From 130be1d24c343267aa2c5ed69f6eb23aeffdec4e Mon Sep 17 00:00:00 2001
From: ultranity <1095429904@qq.com>
Date: Thu, 11 Jan 2024 23:46:37 +0800
Subject: [PATCH 01/17] move model helper function in pipeline to
EfficiencyMixin
---
src/diffusers/__init__.py | 2 +
src/diffusers/pipelines/__init__.py | 2 +
src/diffusers/pipelines/pipeline_utils.py | 120 ++++++++++++++++++
.../pipeline_stable_diffusion.py | 117 -----------------
4 files changed, 124 insertions(+), 117 deletions(-)
diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py
index b879f7d3536d..5e276845a85b 100644
--- a/src/diffusers/__init__.py
+++ b/src/diffusers/__init__.py
@@ -121,6 +121,7 @@
"DDPMPipeline",
"DiffusionPipeline",
"DiTPipeline",
+ "EfficiencyMixin",
"ImagePipelineOutput",
"KarrasVePipeline",
"LDMPipeline",
@@ -505,6 +506,7 @@
DDPMPipeline,
DiffusionPipeline,
DiTPipeline,
+ EfficiencyMixin,
ImagePipelineOutput,
KarrasVePipeline,
LDMPipeline,
diff --git a/src/diffusers/pipelines/__init__.py b/src/diffusers/pipelines/__init__.py
index 1bf41aeaf0df..8ccade3aa228 100644
--- a/src/diffusers/pipelines/__init__.py
+++ b/src/diffusers/pipelines/__init__.py
@@ -48,6 +48,7 @@
_import_structure["pipeline_utils"] = [
"AudioPipelineOutput",
"DiffusionPipeline",
+ "EfficiencyMixin",
"ImagePipelineOutput",
]
_import_structure["deprecated"].extend(
@@ -328,6 +329,7 @@
from .pipeline_utils import (
AudioPipelineOutput,
DiffusionPipeline,
+ EfficiencyMixin,
ImagePipelineOutput,
)
diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py
index 18a4b5cb346b..c2947e3f8dfe 100644
--- a/src/diffusers/pipelines/pipeline_utils.py
+++ b/src/diffusers/pipelines/pipeline_utils.py
@@ -42,6 +42,8 @@
from .. import __version__
from ..configuration_utils import ConfigMixin
+from ..models import AutoencoderKL
+from ..models.attention_processor import FusedAttnProcessor2_0
from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT
from ..schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME
from ..utils import (
@@ -2094,3 +2096,121 @@ def set_attention_slice(self, slice_size: Optional[int]):
for module in modules:
module.set_attention_slice(slice_size)
+
+class EfficiencyMixin:
+ r"""
+ Helper for DiffusionPipeline with vae and unet.(mainly for stable diffusion)
+ """
+ def enable_vae_slicing(self):
+ r"""
+ Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
+ compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
+ """
+ self.vae.enable_slicing()
+
+ def disable_vae_slicing(self):
+ r"""
+ Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
+ computing decoding in one step.
+ """
+ self.vae.disable_slicing()
+
+ def enable_vae_tiling(self):
+ r"""
+ Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
+ compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
+ processing larger images.
+ """
+ self.vae.enable_tiling()
+
+ def disable_vae_tiling(self):
+ r"""
+ Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
+ computing decoding in one step.
+ """
+ self.vae.disable_tiling()
+
+ def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
+ r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
+
+ The suffixes after the scaling factors represent the stages where they are being applied.
+
+ Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
+ that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
+
+ Args:
+ s1 (`float`):
+ Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
+ mitigate "oversmoothing effect" in the enhanced denoising process.
+ s2 (`float`):
+ Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
+ mitigate "oversmoothing effect" in the enhanced denoising process.
+ b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
+ b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
+ """
+ if not hasattr(self, "unet"):
+ raise ValueError("The pipeline must have `unet` for using FreeU.")
+ self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
+
+ def disable_freeu(self):
+ """Disables the FreeU mechanism if enabled."""
+ self.unet.disable_freeu()
+
+ def fuse_qkv_projections(self, unet: bool = True, vae: bool = True):
+ """
+ Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
+ key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
+
+
+
+ This API is 🧪 experimental.
+
+
+
+ Args:
+ unet (`bool`, defaults to `True`): To apply fusion on the UNet.
+ vae (`bool`, defaults to `True`): To apply fusion on the VAE.
+ """
+ self.fusing_unet = False
+ self.fusing_vae = False
+
+ if unet:
+ self.fusing_unet = True
+ self.unet.fuse_qkv_projections()
+ self.unet.set_attn_processor(FusedAttnProcessor2_0())
+
+ if vae:
+ if not isinstance(self.vae, AutoencoderKL):
+ raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.")
+
+ self.fusing_vae = True
+ self.vae.fuse_qkv_projections()
+ self.vae.set_attn_processor(FusedAttnProcessor2_0())
+
+ def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True):
+ """Disable QKV projection fusion if enabled.
+
+
+
+ This API is 🧪 experimental.
+
+
+
+ Args:
+ unet (`bool`, defaults to `True`): To apply fusion on the UNet.
+ vae (`bool`, defaults to `True`): To apply fusion on the VAE.
+
+ """
+ if unet:
+ if not self.fusing_unet:
+ logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.")
+ else:
+ self.unet.unfuse_qkv_projections()
+ self.fusing_unet = False
+
+ if vae:
+ if not self.fusing_vae:
+ logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.")
+ else:
+ self.vae.unfuse_qkv_projections()
+ self.fusing_vae = False
\ No newline at end of file
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index a62b050afe92..5249e897ce85 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -23,7 +23,6 @@
from ...image_processor import PipelineImageInput, VaeImageProcessor
from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel
-from ...models.attention_processor import FusedAttnProcessor2_0
from ...models.lora import adjust_lora_scale_text_encoder
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import (
@@ -248,35 +247,6 @@ def __init__(
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
def _encode_prompt(
self,
prompt,
@@ -666,93 +636,6 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype
latents = latents * self.scheduler.init_noise_sigma
return latents
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
- # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.fuse_qkv_projections
- def fuse_qkv_projections(self, unet: bool = True, vae: bool = True):
- """
- Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
- key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
-
-
-
- This API is 🧪 experimental.
-
-
-
- Args:
- unet (`bool`, defaults to `True`): To apply fusion on the UNet.
- vae (`bool`, defaults to `True`): To apply fusion on the VAE.
- """
- self.fusing_unet = False
- self.fusing_vae = False
-
- if unet:
- self.fusing_unet = True
- self.unet.fuse_qkv_projections()
- self.unet.set_attn_processor(FusedAttnProcessor2_0())
-
- if vae:
- if not isinstance(self.vae, AutoencoderKL):
- raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.")
-
- self.fusing_vae = True
- self.vae.fuse_qkv_projections()
- self.vae.set_attn_processor(FusedAttnProcessor2_0())
-
- # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.unfuse_qkv_projections
- def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True):
- """Disable QKV projection fusion if enabled.
-
-
-
- This API is 🧪 experimental.
-
-
-
- Args:
- unet (`bool`, defaults to `True`): To apply fusion on the UNet.
- vae (`bool`, defaults to `True`): To apply fusion on the VAE.
-
- """
- if unet:
- if not self.fusing_unet:
- logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.")
- else:
- self.unet.unfuse_qkv_projections()
- self.fusing_unet = False
-
- if vae:
- if not self.fusing_vae:
- logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.")
- else:
- self.vae.unfuse_qkv_projections()
- self.fusing_vae = False
-
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
"""
From ec74982b7ed08ed75443576d4642ae6b4eade78c Mon Sep 17 00:00:00 2001
From: ultranity <1095429904@qq.com>
Date: Fri, 12 Jan 2024 01:31:07 +0800
Subject: [PATCH 02/17] deduplicate functions replaced by EfficiencyMixin
---
...p_guided_images_mixing_stable_diffusion.py | 10 --
.../community/clip_guided_stable_diffusion.py | 10 --
.../clip_guided_stable_diffusion_img2img.py | 10 --
.../community/composable_stable_diffusion.py | 61 +-------
examples/community/gluegen.py | 58 +-------
examples/community/imagic_stable_diffusion.py | 25 ----
examples/community/img2img_inpainting.py | 27 ----
.../community/interpolate_stable_diffusion.py | 27 ----
.../latent_consistency_interpolate.py | 65 +--------
examples/community/llm_grounded_diffusion.py | 69 +--------
examples/community/lpw_stable_diffusion.py | 111 +--------------
examples/community/lpw_stable_diffusion_xl.py | 131 +-----------------
.../multilingual_stable_diffusion.py | 27 ----
.../pipeline_animatediff_controlnet.py | 67 +--------
.../community/pipeline_demofusion_sdxl.py | 39 +-----
.../community/pipeline_sdxl_style_aligned.py | 125 +----------------
..._stable_diffusion_xl_controlnet_adapter.py | 69 +--------
...diffusion_xl_controlnet_adapter_inpaint.py | 66 +--------
examples/community/pipeline_zero1to3.py | 111 +--------------
examples/community/sd_text2img_k_diffusion.py | 68 +--------
.../community/seed_resize_stable_diffusion.py | 30 +---
.../community/speech_to_image_diffusion.py | 11 +-
.../community/stable_diffusion_comparison.py | 28 +---
.../stable_diffusion_controlnet_img2img.py | 85 ------------
.../stable_diffusion_controlnet_inpaint.py | 85 ------------
...le_diffusion_controlnet_inpaint_img2img.py | 85 ------------
examples/community/stable_diffusion_ipex.py | 109 +--------------
examples/community/stable_diffusion_mega.py | 30 +---
.../community/stable_diffusion_repaint.py | 79 +----------
examples/community/text_inpainting.py | 68 +--------
.../community/unclip_image_interpolation.py | 46 +-----
.../community/unclip_text_interpolation.py | 47 +------
.../controlnetxs/pipeline_controlnet_xs.py | 65 +--------
.../pipeline_controlnet_xs_sd_xl.py | 69 +--------
.../research_projects/rdm/pipeline_rdm.py | 119 +---------------
.../animatediff/pipeline_animatediff.py | 66 +--------
.../pipelines/audioldm/pipeline_audioldm.py | 20 +--
.../pipelines/audioldm2/pipeline_audioldm2.py | 16 ---
.../controlnet/pipeline_controlnet.py | 70 +---------
.../controlnet/pipeline_controlnet_img2img.py | 70 +---------
.../controlnet/pipeline_controlnet_inpaint.py | 70 +---------
.../pipeline_controlnet_inpaint_sd_xl.py | 65 +--------
.../controlnet/pipeline_controlnet_sd_xl.py | 64 +--------
.../pipeline_controlnet_sd_xl_img2img.py | 65 +--------
.../alt_diffusion/pipeline_alt_diffusion.py | 124 +----------------
.../pipeline_alt_diffusion_img2img.py | 95 +------------
...pipeline_stable_diffusion_model_editing.py | 22 +--
.../pipeline_stable_diffusion_paradigms.py | 37 +----
.../pipeline_latent_consistency_img2img.py | 70 +---------
.../pipeline_latent_consistency_text2img.py | 70 +---------
.../pipelines/musicldm/pipeline_musicldm.py | 20 +--
.../pipeline_paint_by_example.py | 4 +-
src/diffusers/pipelines/pipeline_utils.py | 4 +-
.../pipeline_semantic_stable_diffusion.py | 4 +-
.../pipeline_stable_diffusion.py | 4 +-
...peline_stable_diffusion_image_variation.py | 32 +----
.../pipeline_stable_diffusion_img2img.py | 99 +------------
.../pipeline_stable_diffusion_inpaint.py | 99 +------------
...eline_stable_diffusion_instruct_pix2pix.py | 32 +----
...ipeline_stable_diffusion_latent_upscale.py | 32 +----
.../pipeline_stable_diffusion_upscale.py | 32 +----
.../pipeline_stable_unclip.py | 20 +--
.../pipeline_stable_unclip_img2img.py | 20 +--
...line_stable_diffusion_attend_and_excite.py | 20 +--
.../pipeline_stable_diffusion_diffedit.py | 39 +-----
.../pipeline_stable_diffusion_gligen.py | 33 +----
...line_stable_diffusion_gligen_text_image.py | 33 +----
.../pipeline_stable_diffusion_k_diffusion.py | 4 +-
...ipeline_stable_diffusion_xl_k_diffusion.py | 124 +----------------
.../pipeline_stable_diffusion_ldm3d.py | 42 +-----
.../pipeline_stable_diffusion_panorama.py | 22 +--
.../pipeline_stable_diffusion_safe.py | 4 +-
.../pipeline_stable_diffusion_sag.py | 20 +--
.../pipeline_stable_diffusion_xl.py | 123 +---------------
.../pipeline_stable_diffusion_xl_img2img.py | 126 +----------------
.../pipeline_stable_diffusion_xl_inpaint.py | 126 +----------------
...ne_stable_diffusion_xl_instruct_pix2pix.py | 68 +--------
.../pipeline_stable_diffusion_adapter.py | 48 +------
.../pipeline_stable_diffusion_xl_adapter.py | 64 +--------
.../pipeline_text_to_video_synth.py | 65 +--------
.../pipeline_text_to_video_synth_img2img.py | 65 +--------
.../pipeline_text_to_video_zero.py | 4 +-
.../pipeline_text_to_video_zero_sdxl.py | 19 +--
.../unidiffuser/pipeline_unidiffuser.py | 37 +----
84 files changed, 236 insertions(+), 4308 deletions(-)
diff --git a/examples/community/clip_guided_images_mixing_stable_diffusion.py b/examples/community/clip_guided_images_mixing_stable_diffusion.py
index 399f5b14506d..6fcbb16963b8 100644
--- a/examples/community/clip_guided_images_mixing_stable_diffusion.py
+++ b/examples/community/clip_guided_images_mixing_stable_diffusion.py
@@ -113,16 +113,6 @@ def __init__(
set_requires_grad(self.text_encoder, False)
set_requires_grad(self.clip_model, False)
- def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"):
- if slice_size == "auto":
- # half the attention head size is usually a good trade-off between
- # speed and memory
- slice_size = self.unet.config.attention_head_dim // 2
- self.unet.set_attention_slice(slice_size)
-
- def disable_attention_slicing(self):
- self.enable_attention_slicing(None)
-
def freeze_vae(self):
set_requires_grad(self.vae, False)
diff --git a/examples/community/clip_guided_stable_diffusion.py b/examples/community/clip_guided_stable_diffusion.py
index 3f4ab2ab9f4a..9065462940c2 100644
--- a/examples/community/clip_guided_stable_diffusion.py
+++ b/examples/community/clip_guided_stable_diffusion.py
@@ -89,16 +89,6 @@ def __init__(
set_requires_grad(self.text_encoder, False)
set_requires_grad(self.clip_model, False)
- def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"):
- if slice_size == "auto":
- # half the attention head size is usually a good trade-off between
- # speed and memory
- slice_size = self.unet.config.attention_head_dim // 2
- self.unet.set_attention_slice(slice_size)
-
- def disable_attention_slicing(self):
- self.enable_attention_slicing(None)
-
def freeze_vae(self):
set_requires_grad(self.vae, False)
diff --git a/examples/community/clip_guided_stable_diffusion_img2img.py b/examples/community/clip_guided_stable_diffusion_img2img.py
index 2dbc9bef9ffe..83e117f02dd2 100644
--- a/examples/community/clip_guided_stable_diffusion_img2img.py
+++ b/examples/community/clip_guided_stable_diffusion_img2img.py
@@ -163,16 +163,6 @@ def __init__(
set_requires_grad(self.text_encoder, False)
set_requires_grad(self.clip_model, False)
- def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"):
- if slice_size == "auto":
- # half the attention head size is usually a good trade-off between
- # speed and memory
- slice_size = self.unet.config.attention_head_dim // 2
- self.unet.set_attention_slice(slice_size)
-
- def disable_attention_slicing(self):
- self.enable_attention_slicing(None)
-
def freeze_vae(self):
set_requires_grad(self.vae, False)
diff --git a/examples/community/composable_stable_diffusion.py b/examples/community/composable_stable_diffusion.py
index 2693ae45afac..eb099f9398b9 100644
--- a/examples/community/composable_stable_diffusion.py
+++ b/examples/community/composable_stable_diffusion.py
@@ -22,6 +22,7 @@
from diffusers import DiffusionPipeline
from diffusers.configuration_utils import FrozenDict
from diffusers.models import AutoencoderKL, UNet2DConditionModel
+from diffusers.pipelines.pipeline_utils import EfficiencyMixin
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import (
@@ -32,13 +33,13 @@
LMSDiscreteScheduler,
PNDMScheduler,
)
-from diffusers.utils import deprecate, is_accelerate_available, logging
+from diffusers.utils import deprecate, logging
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class ComposableStableDiffusionPipeline(DiffusionPipeline):
+class ComposableStableDiffusionPipeline(DiffusionPipeline, EfficiencyMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion.
@@ -164,62 +165,6 @@ def __init__(
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.register_to_config(requires_safety_checker=requires_safety_checker)
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding.
-
- When this option is enabled, the VAE will split the input tensor in slices to compute decoding in several
- steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- def enable_sequential_cpu_offload(self, gpu_id=0):
- r"""
- Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
- text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
- `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
- """
- if is_accelerate_available():
- from accelerate import cpu_offload
- else:
- raise ImportError("Please install accelerate via `pip install accelerate`")
-
- device = torch.device(f"cuda:{gpu_id}")
-
- for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]:
- if cpu_offloaded_model is not None:
- cpu_offload(cpu_offloaded_model, device)
-
- if self.safety_checker is not None:
- # TODO(Patrick) - there is currently a bug with cpu offload of nn.Parameter in accelerate
- # fix by only offloading self.safety_checker for now
- cpu_offload(self.safety_checker.vision_model, device)
-
- @property
- def _execution_device(self):
- r"""
- Returns the device on which the pipeline's models will be executed. After calling
- `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module
- hooks.
- """
- if self.device != torch.device("meta") or not hasattr(self.unet, "_hf_hook"):
- return self.device
- for module in self.unet.modules():
- if (
- hasattr(module, "_hf_hook")
- and hasattr(module._hf_hook, "execution_device")
- and module._hf_hook.execution_device is not None
- ):
- return torch.device(module._hf_hook.execution_device)
- return self.device
-
def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt):
r"""
Encodes the prompt into text encoder hidden states.
diff --git a/examples/community/gluegen.py b/examples/community/gluegen.py
index ecfe91eb9483..19cbf6cb3b82 100644
--- a/examples/community/gluegen.py
+++ b/examples/community/gluegen.py
@@ -10,6 +10,7 @@
from diffusers.loaders import LoraLoaderMixin
from diffusers.models import AutoencoderKL, UNet2DConditionModel
from diffusers.models.lora import adjust_lora_scale_text_encoder
+from diffusers.pipelines.pipeline_utils import EfficiencyMixin
from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import KarrasDiffusionSchedulers
@@ -193,7 +194,7 @@ def retrieve_timesteps(
return timesteps, num_inference_steps
-class GlueGenStableDiffusionPipeline(DiffusionPipeline, LoraLoaderMixin):
+class GlueGenStableDiffusionPipeline(DiffusionPipeline, EfficiencyMixin, LoraLoaderMixin):
def __init__(
self,
vae: AutoencoderKL,
@@ -241,35 +242,6 @@ def load_language_adapter(
)
self.language_adapter.load_state_dict(torch.load(model_path))
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
def _adapt_language(self, prompt_embeds: torch.FloatTensor):
prompt_embeds = prompt_embeds / 3
prompt_embeds = self.language_adapter(prompt_embeds) * (self.tensor_norm / 2)
@@ -544,32 +516,6 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype
latents = latents * self.scheduler.init_noise_sigma
return latents
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
"""
diff --git a/examples/community/imagic_stable_diffusion.py b/examples/community/imagic_stable_diffusion.py
index acd09c7e0bf4..0f744c1557fd 100644
--- a/examples/community/imagic_stable_diffusion.py
+++ b/examples/community/imagic_stable_diffusion.py
@@ -105,31 +105,6 @@ def __init__(
feature_extractor=feature_extractor,
)
- def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"):
- r"""
- Enable sliced attention computation.
- When this option is enabled, the attention module will split the input tensor in slices, to compute attention
- in several steps. This is useful to save some memory in exchange for a small speed decrease.
- Args:
- slice_size (`str` or `int`, *optional*, defaults to `"auto"`):
- When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If
- a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case,
- `attention_head_dim` must be a multiple of `slice_size`.
- """
- if slice_size == "auto":
- # half the attention head size is usually a good trade-off between
- # speed and memory
- slice_size = self.unet.config.attention_head_dim // 2
- self.unet.set_attention_slice(slice_size)
-
- def disable_attention_slicing(self):
- r"""
- Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go
- back to computing attention in one step.
- """
- # set slice_size = `None` to disable `attention slicing`
- self.enable_attention_slicing(None)
-
def train(
self,
prompt: Union[str, List[str]],
diff --git a/examples/community/img2img_inpainting.py b/examples/community/img2img_inpainting.py
index 8ee8355d49a6..71dc3cf712ed 100644
--- a/examples/community/img2img_inpainting.py
+++ b/examples/community/img2img_inpainting.py
@@ -129,33 +129,6 @@ def __init__(
feature_extractor=feature_extractor,
)
- def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"):
- r"""
- Enable sliced attention computation.
-
- When this option is enabled, the attention module will split the input tensor in slices, to compute attention
- in several steps. This is useful to save some memory in exchange for a small speed decrease.
-
- Args:
- slice_size (`str` or `int`, *optional*, defaults to `"auto"`):
- When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If
- a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case,
- `attention_head_dim` must be a multiple of `slice_size`.
- """
- if slice_size == "auto":
- # half the attention head size is usually a good trade-off between
- # speed and memory
- slice_size = self.unet.config.attention_head_dim // 2
- self.unet.set_attention_slice(slice_size)
-
- def disable_attention_slicing(self):
- r"""
- Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go
- back to computing attention in one step.
- """
- # set slice_size = `None` to disable `attention slicing`
- self.enable_attention_slicing(None)
-
@torch.no_grad()
def __call__(
self,
diff --git a/examples/community/interpolate_stable_diffusion.py b/examples/community/interpolate_stable_diffusion.py
index 70e4d025a037..4c13e0046b9a 100644
--- a/examples/community/interpolate_stable_diffusion.py
+++ b/examples/community/interpolate_stable_diffusion.py
@@ -120,33 +120,6 @@ def __init__(
feature_extractor=feature_extractor,
)
- def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"):
- r"""
- Enable sliced attention computation.
-
- When this option is enabled, the attention module will split the input tensor in slices, to compute attention
- in several steps. This is useful to save some memory in exchange for a small speed decrease.
-
- Args:
- slice_size (`str` or `int`, *optional*, defaults to `"auto"`):
- When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If
- a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case,
- `attention_head_dim` must be a multiple of `slice_size`.
- """
- if slice_size == "auto":
- # half the attention head size is usually a good trade-off between
- # speed and memory
- slice_size = self.unet.config.attention_head_dim // 2
- self.unet.set_attention_slice(slice_size)
-
- def disable_attention_slicing(self):
- r"""
- Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go
- back to computing attention in one step.
- """
- # set slice_size = `None` to disable `attention slicing`
- self.enable_attention_slicing(None)
-
@torch.no_grad()
def __call__(
self,
diff --git a/examples/community/latent_consistency_interpolate.py b/examples/community/latent_consistency_interpolate.py
index 7b9e4806bf44..a70fb6a467f5 100644
--- a/examples/community/latent_consistency_interpolate.py
+++ b/examples/community/latent_consistency_interpolate.py
@@ -9,7 +9,7 @@
from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
from diffusers.models import AutoencoderKL, UNet2DConditionModel
from diffusers.models.lora import adjust_lora_scale_text_encoder
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker
from diffusers.schedulers import LCMScheduler
from diffusers.utils import (
@@ -190,7 +190,7 @@ def slerp(
class LatentConsistencyModelWalkPipeline(
- DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
+ DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
):
r"""
Pipeline for text-to-image generation using a latent consistency model.
@@ -273,67 +273,6 @@ def __init__(
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_prompt
def encode_prompt(
self,
diff --git a/examples/community/llm_grounded_diffusion.py b/examples/community/llm_grounded_diffusion.py
index d815b4ea8e42..39d530e09b1c 100644
--- a/examples/community/llm_grounded_diffusion.py
+++ b/examples/community/llm_grounded_diffusion.py
@@ -35,6 +35,7 @@
from diffusers.models.attention_processor import AttnProcessor2_0
from diffusers.models.lora import adjust_lora_scale_text_encoder
from diffusers.pipelines import DiffusionPipeline
+from diffusers.pipelines.pipeline_utils import EfficiencyMixin
from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import KarrasDiffusionSchedulers
@@ -267,7 +268,12 @@ def __call__(
class LLMGroundedDiffusionPipeline(
- DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin
+ DiffusionPipeline,
+ EfficiencyMixin,
+ TextualInversionLoaderMixin,
+ LoraLoaderMixin,
+ IPAdapterMixin,
+ FromSingleFileMixin,
):
r"""
Pipeline for layout-grounded text-to-image generation using LLM-grounded Diffusion (LMD+): https://arxiv.org/pdf/2305.13655.pdf.
@@ -1180,39 +1186,6 @@ def latent_lmd_guidance(
# Below are methods copied from StableDiffusionPipeline
# The design choice of not inheriting from StableDiffusionPipeline is discussed here: https://github.com/huggingface/diffusers/pull/5993#issuecomment-1834258517
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt(
self,
@@ -1522,34 +1495,6 @@ def prepare_latents(
latents = latents * self.scheduler.init_noise_sigma
return latents
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
"""
diff --git a/examples/community/lpw_stable_diffusion.py b/examples/community/lpw_stable_diffusion.py
index 7249e033186f..debaef7d4642 100644
--- a/examples/community/lpw_stable_diffusion.py
+++ b/examples/community/lpw_stable_diffusion.py
@@ -13,13 +13,12 @@
from diffusers.image_processor import VaeImageProcessor
from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
from diffusers.models import AutoencoderKL, UNet2DConditionModel
+from diffusers.pipelines.pipeline_utils import EfficiencyMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
PIL_INTERPOLATION,
deprecate,
- is_accelerate_available,
- is_accelerate_version,
logging,
)
from diffusers.utils.torch_utils import randn_tensor
@@ -410,7 +409,7 @@ def preprocess_mask(mask, batch_size, scale_factor=8):
class StableDiffusionLongPromptWeightingPipeline(
- DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
+ DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
):
r"""
Pipeline for text-to-image generation using Stable Diffusion without tokens length limit, and support parsing
@@ -534,112 +533,6 @@ def __init__(
requires_safety_checker=requires_safety_checker,
)
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding.
-
- When this option is enabled, the VAE will split the input tensor in slices to compute decoding in several
- steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding.
-
- When this option is enabled, the VAE will split the input tensor into tiles to compute decoding and encoding in
- several steps. This is useful to save a large amount of memory and to allow the processing of larger images.
- """
- self.vae.enable_tiling()
-
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously invoked, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload
- def enable_sequential_cpu_offload(self, gpu_id=0):
- r"""
- Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
- text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
- `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
- Note that offloading happens on a submodule basis. Memory savings are higher than with
- `enable_model_cpu_offload`, but performance is lower.
- """
- if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
- from accelerate import cpu_offload
- else:
- raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
-
- device = torch.device(f"cuda:{gpu_id}")
-
- if self.device.type != "cpu":
- self.to("cpu", silence_dtype_warnings=True)
- torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
-
- for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]:
- cpu_offload(cpu_offloaded_model, device)
-
- if self.safety_checker is not None:
- cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_model_cpu_offload
- def enable_model_cpu_offload(self, gpu_id=0):
- r"""
- Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
- to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
- method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
- `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
- """
- if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
- from accelerate import cpu_offload_with_hook
- else:
- raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
-
- device = torch.device(f"cuda:{gpu_id}")
-
- if self.device.type != "cpu":
- self.to("cpu", silence_dtype_warnings=True)
- torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
-
- hook = None
- for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]:
- _, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
-
- if self.safety_checker is not None:
- _, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook)
-
- # We'll offload the last model manually.
- self.final_offload_hook = hook
-
- @property
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._execution_device
- def _execution_device(self):
- r"""
- Returns the device on which the pipeline's models will be executed. After calling
- `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module
- hooks.
- """
- if not hasattr(self.unet, "_hf_hook"):
- return self.device
- for module in self.unet.modules():
- if (
- hasattr(module, "_hf_hook")
- and hasattr(module._hf_hook, "execution_device")
- and module._hf_hook.execution_device is not None
- ):
- return torch.device(module._hf_hook.execution_device)
- return self.device
-
def _encode_prompt(
self,
prompt,
diff --git a/examples/community/lpw_stable_diffusion_xl.py b/examples/community/lpw_stable_diffusion_xl.py
index 479c76bbdc56..83f0e8b7818d 100644
--- a/examples/community/lpw_stable_diffusion_xl.py
+++ b/examples/community/lpw_stable_diffusion_xl.py
@@ -26,11 +26,11 @@
from diffusers.models import AutoencoderKL, ImageProjection, UNet2DConditionModel
from diffusers.models.attention_processor import (
AttnProcessor2_0,
- FusedAttnProcessor2_0,
LoRAAttnProcessor2_0,
LoRAXFormersAttnProcessor,
XFormersAttnProcessor,
)
+from diffusers.pipelines.pipeline_utils import EfficiencyMixin
from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
@@ -545,7 +545,12 @@ def retrieve_timesteps(
class SDXLLongPromptWeightingPipeline(
- DiffusionPipeline, FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
+ DiffusionPipeline,
+ EfficiencyMixin,
+ FromSingleFileMixin,
+ IPAdapterMixin,
+ LoraLoaderMixin,
+ TextualInversionLoaderMixin,
):
r"""
Pipeline for text-to-image generation using Stable Diffusion XL.
@@ -649,39 +654,6 @@ def __init__(
else:
self.watermark = None
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
@@ -1030,95 +1002,6 @@ def check_inputs(
"If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`."
)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
- # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.fuse_qkv_projections
- def fuse_qkv_projections(self, unet: bool = True, vae: bool = True):
- """
- Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
- key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
-
-
-
- This API is 🧪 experimental.
-
-
-
- Args:
- unet (`bool`, defaults to `True`): To apply fusion on the UNet.
- vae (`bool`, defaults to `True`): To apply fusion on the VAE.
- """
- self.fusing_unet = False
- self.fusing_vae = False
-
- if unet:
- self.fusing_unet = True
- self.unet.fuse_qkv_projections()
- self.unet.set_attn_processor(FusedAttnProcessor2_0())
-
- if vae:
- if not isinstance(self.vae, AutoencoderKL):
- raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.")
-
- self.fusing_vae = True
- self.vae.fuse_qkv_projections()
- self.vae.set_attn_processor(FusedAttnProcessor2_0())
-
- # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.unfuse_qkv_projections
- def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True):
- """Disable QKV projection fusion if enabled.
-
-
-
- This API is 🧪 experimental.
-
-
-
- Args:
- unet (`bool`, defaults to `True`): To apply fusion on the UNet.
- vae (`bool`, defaults to `True`): To apply fusion on the VAE.
-
- """
- if unet:
- if not self.fusing_unet:
- logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.")
- else:
- self.unet.unfuse_qkv_projections()
- self.fusing_unet = False
-
- if vae:
- if not self.fusing_vae:
- logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.")
- else:
- self.vae.unfuse_qkv_projections()
- self.fusing_vae = False
-
def get_timesteps(self, num_inference_steps, strength, device, denoising_start=None):
# get the original timestep using init_timestep
if denoising_start is None:
diff --git a/examples/community/multilingual_stable_diffusion.py b/examples/community/multilingual_stable_diffusion.py
index 7597efd215af..0a3b49a14d7d 100644
--- a/examples/community/multilingual_stable_diffusion.py
+++ b/examples/community/multilingual_stable_diffusion.py
@@ -135,33 +135,6 @@ def __init__(
feature_extractor=feature_extractor,
)
- def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"):
- r"""
- Enable sliced attention computation.
-
- When this option is enabled, the attention module will split the input tensor in slices, to compute attention
- in several steps. This is useful to save some memory in exchange for a small speed decrease.
-
- Args:
- slice_size (`str` or `int`, *optional*, defaults to `"auto"`):
- When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If
- a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case,
- `attention_head_dim` must be a multiple of `slice_size`.
- """
- if slice_size == "auto":
- # half the attention head size is usually a good trade-off between
- # speed and memory
- slice_size = self.unet.config.attention_head_dim // 2
- self.unet.set_attention_slice(slice_size)
-
- def disable_attention_slicing(self):
- r"""
- Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go
- back to computing attention in one step.
- """
- # set slice_size = `None` to disable `attention slicing`
- self.enable_attention_slicing(None)
-
@torch.no_grad()
def __call__(
self,
diff --git a/examples/community/pipeline_animatediff_controlnet.py b/examples/community/pipeline_animatediff_controlnet.py
index 1285e7c97a9b..dc7b6302c5ea 100644
--- a/examples/community/pipeline_animatediff_controlnet.py
+++ b/examples/community/pipeline_animatediff_controlnet.py
@@ -28,7 +28,7 @@
from diffusers.models.lora import adjust_lora_scale_text_encoder
from diffusers.models.unets.unet_motion_model import MotionAdapter
from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin
from diffusers.schedulers import (
DDIMScheduler,
DPMSolverMultistepScheduler,
@@ -111,7 +111,9 @@ class AnimateDiffControlNetPipelineOutput(BaseOutput):
frames: Union[torch.Tensor, np.ndarray]
-class AnimateDiffControlNetPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin):
+class AnimateDiffControlNetPipeline(
+ DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin
+):
r"""
Pipeline for text-to-video generation.
@@ -406,67 +408,6 @@ def decode_latents(self, latents):
video = video.float()
return video
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
def prepare_extra_step_kwargs(self, generator, eta):
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
diff --git a/examples/community/pipeline_demofusion_sdxl.py b/examples/community/pipeline_demofusion_sdxl.py
index ab0d3cf9dd29..69624271beed 100644
--- a/examples/community/pipeline_demofusion_sdxl.py
+++ b/examples/community/pipeline_demofusion_sdxl.py
@@ -23,7 +23,7 @@
XFormersAttnProcessor,
)
from diffusers.models.lora import adjust_lora_scale_text_encoder
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
is_accelerate_available,
@@ -93,7 +93,9 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
return noise_cfg
-class DemoFusionSDXLPipeline(DiffusionPipeline, FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin):
+class DemoFusionSDXLPipeline(
+ DiffusionPipeline, EfficiencyMixin, FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
+):
r"""
Pipeline for text-to-image generation using Stable Diffusion XL.
@@ -176,39 +178,6 @@ def __init__(
else:
self.watermark = None
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
def encode_prompt(
self,
prompt: str,
diff --git a/examples/community/pipeline_sdxl_style_aligned.py b/examples/community/pipeline_sdxl_style_aligned.py
index fa54b542c5ca..2b6047e97bfb 100644
--- a/examples/community/pipeline_sdxl_style_aligned.py
+++ b/examples/community/pipeline_sdxl_style_aligned.py
@@ -51,7 +51,7 @@
XFormersAttnProcessor,
)
from diffusers.models.lora import adjust_lora_scale_text_encoder
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin
from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
@@ -85,7 +85,7 @@
>>> from typing import List
>>> import torch
- >>> from diffusers.pipelines.pipeline_utils import DiffusionPipeline
+ >>> from diffusers.pipelines.pipeline_utils import DiffusionPipeline,EfficiencyMixin
>>> from PIL import Image
>>> model_id = "a-r-r-o-w/dreamshaper-xl-turbo"
@@ -389,6 +389,7 @@ def retrieve_latents(
class StyleAlignedSDXLPipeline(
DiffusionPipeline,
+ EfficiencyMixin,
FromSingleFileMixin,
StableDiffusionXLLoraLoaderMixin,
TextualInversionLoaderMixin,
@@ -504,39 +505,6 @@ def __init__(
else:
self.watermark = None
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
def encode_prompt(
self,
prompt: str,
@@ -1187,34 +1155,6 @@ def upcast_vae(self):
self.vae.decoder.conv_in.to(dtype)
self.vae.decoder.mid_block.to(dtype)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
def _enable_shared_attention_processors(
self,
share_attention: bool,
@@ -1361,65 +1301,6 @@ def disable_style_aligned(self):
self._style_aligned_norm_layers = None
self._disable_shared_attention_processors()
- def fuse_qkv_projections(self, unet: bool = True, vae: bool = True):
- """
- Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
- key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
-
-
-
- This API is 🧪 experimental.
-
-
-
- Args:
- unet (`bool`, defaults to `True`): To apply fusion on the UNet.
- vae (`bool`, defaults to `True`): To apply fusion on the VAE.
- """
- self.fusing_unet = False
- self.fusing_vae = False
-
- if unet:
- self.fusing_unet = True
- self.unet.fuse_qkv_projections()
- self.unet.set_attn_processor(FusedAttnProcessor2_0())
-
- if vae:
- if not isinstance(self.vae, AutoencoderKL):
- raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.")
-
- self.fusing_vae = True
- self.vae.fuse_qkv_projections()
- self.vae.set_attn_processor(FusedAttnProcessor2_0())
-
- def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True):
- """Disable QKV projection fusion if enabled.
-
-
-
- This API is 🧪 experimental.
-
-
-
- Args:
- unet (`bool`, defaults to `True`): To apply fusion on the UNet.
- vae (`bool`, defaults to `True`): To apply fusion on the VAE.
-
- """
- if unet:
- if not self.fusing_unet:
- logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.")
- else:
- self.unet.unfuse_qkv_projections()
- self.fusing_unet = False
-
- if vae:
- if not self.fusing_vae:
- logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.")
- else:
- self.vae.unfuse_qkv_projections()
- self.fusing_vae = False
-
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
"""
diff --git a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py
index e1437bee7a15..490fde58b916 100644
--- a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py
+++ b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py
@@ -33,7 +33,7 @@
)
from diffusers.models.lora import adjust_lora_scale_text_encoder
from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin
from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
@@ -158,7 +158,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
class StableDiffusionXLControlNetAdapterPipeline(
- DiffusionPipeline, FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin
+ DiffusionPipeline,
+ EfficiencyMixin,
+ FromSingleFileMixin,
+ StableDiffusionXLLoraLoaderMixin,
+ TextualInversionLoaderMixin,
):
r"""
Pipeline for text-to-image generation using Stable Diffusion augmented with T2I-Adapter
@@ -234,39 +238,6 @@ def __init__(
)
self.default_sample_size = self.unet.config.sample_size
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt
def encode_prompt(
self,
@@ -863,34 +834,6 @@ def _default_height_width(self, height, width, image):
return height, width
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
def prepare_control_image(
self,
image,
diff --git a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py
index e2a5fec29faf..17777090df24 100644
--- a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py
+++ b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py
@@ -52,6 +52,7 @@
)
from diffusers.models.lora import adjust_lora_scale_text_encoder
from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
+from diffusers.pipelines.pipeline_utils import EfficiencyMixin
from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
@@ -303,7 +304,9 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
return noise_cfg
-class StableDiffusionXLControlNetAdapterInpaintPipeline(DiffusionPipeline, FromSingleFileMixin, LoraLoaderMixin):
+class StableDiffusionXLControlNetAdapterInpaintPipeline(
+ DiffusionPipeline, EfficiencyMixin, FromSingleFileMixin, LoraLoaderMixin
+):
r"""
Pipeline for text-to-image generation using Stable Diffusion augmented with T2I-Adapter
https://arxiv.org/abs/2302.08453
@@ -383,39 +386,6 @@ def __init__(
)
self.default_sample_size = self.unet.config.sample_size
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt
def encode_prompt(
self,
@@ -1207,34 +1177,6 @@ def _default_height_width(self, height, width, image):
return height, width
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
def prepare_control_image(
self,
image,
diff --git a/examples/community/pipeline_zero1to3.py b/examples/community/pipeline_zero1to3.py
index 600cf2dc1b63..6e1c1d015e48 100644
--- a/examples/community/pipeline_zero1to3.py
+++ b/examples/community/pipeline_zero1to3.py
@@ -22,18 +22,16 @@
# randn_tensor,
# replace_example_docstring,
# )
-# from ..pipeline_utils import DiffusionPipeline
+# from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
# from . import StableDiffusionPipelineOutput
# from .safety_checker import StableDiffusionSafetyChecker
-from diffusers import AutoencoderKL, DiffusionPipeline, UNet2DConditionModel
+from diffusers import AutoencoderKL, DiffusionPipeline, EfficiencyMixin, UNet2DConditionModel
from diffusers.configuration_utils import ConfigMixin, FrozenDict
from diffusers.models.modeling_utils import ModelMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
deprecate,
- is_accelerate_available,
- is_accelerate_version,
logging,
replace_example_docstring,
)
@@ -68,7 +66,7 @@ def forward(self, x):
return self.projection(x)
-class Zero1to3StableDiffusionPipeline(DiffusionPipeline):
+class Zero1to3StableDiffusionPipeline(DiffusionPipeline, EfficiencyMixin):
r"""
Pipeline for single view conditioned novel view generation using Zero1to3.
@@ -187,109 +185,6 @@ def __init__(
self.register_to_config(requires_safety_checker=requires_safety_checker)
# self.model_mode = None
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding.
-
- When this option is enabled, the VAE will split the input tensor in slices to compute decoding in several
- steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding.
-
- When this option is enabled, the VAE will split the input tensor into tiles to compute decoding and encoding in
- several steps. This is useful to save a large amount of memory and to allow the processing of larger images.
- """
- self.vae.enable_tiling()
-
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously invoked, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
- def enable_sequential_cpu_offload(self, gpu_id=0):
- r"""
- Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
- text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
- `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
- Note that offloading happens on a submodule basis. Memory savings are higher than with
- `enable_model_cpu_offload`, but performance is lower.
- """
- if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
- from accelerate import cpu_offload
- else:
- raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
-
- device = torch.device(f"cuda:{gpu_id}")
-
- if self.device.type != "cpu":
- self.to("cpu", silence_dtype_warnings=True)
- torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
-
- for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]:
- cpu_offload(cpu_offloaded_model, device)
-
- if self.safety_checker is not None:
- cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True)
-
- def enable_model_cpu_offload(self, gpu_id=0):
- r"""
- Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
- to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
- method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
- `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
- """
- if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
- from accelerate import cpu_offload_with_hook
- else:
- raise ImportError("`enable_model_offload` requires `accelerate v0.17.0` or higher.")
-
- device = torch.device(f"cuda:{gpu_id}")
-
- if self.device.type != "cpu":
- self.to("cpu", silence_dtype_warnings=True)
- torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
-
- hook = None
- for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]:
- _, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
-
- if self.safety_checker is not None:
- _, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook)
-
- # We'll offload the last model manually.
- self.final_offload_hook = hook
-
- @property
- def _execution_device(self):
- r"""
- Returns the device on which the pipeline's models will be executed. After calling
- `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module
- hooks.
- """
- if not hasattr(self.unet, "_hf_hook"):
- return self.device
- for module in self.unet.modules():
- if (
- hasattr(module, "_hf_hook")
- and hasattr(module._hf_hook, "execution_device")
- and module._hf_hook.execution_device is not None
- ):
- return torch.device(module._hf_hook.execution_device)
- return self.device
-
def _encode_prompt(
self,
prompt,
diff --git a/examples/community/sd_text2img_k_diffusion.py b/examples/community/sd_text2img_k_diffusion.py
index c6a4bf2ce613..8928eb383b76 100755
--- a/examples/community/sd_text2img_k_diffusion.py
+++ b/examples/community/sd_text2img_k_diffusion.py
@@ -19,9 +19,9 @@
import torch
from k_diffusion.external import CompVisDenoiser, CompVisVDenoiser
-from diffusers import DiffusionPipeline, LMSDiscreteScheduler
+from diffusers import DiffusionPipeline, EfficiencyMixin, LMSDiscreteScheduler
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
-from diffusers.utils import is_accelerate_available, logging
+from diffusers.utils import logging
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
@@ -41,7 +41,7 @@ def apply_model(self, *args, **kwargs):
return self.model(*args, encoder_hidden_states=encoder_hidden_states, **kwargs).sample
-class StableDiffusionPipeline(DiffusionPipeline):
+class StableDiffusionPipeline(DiffusionPipeline, EfficiencyMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion.
@@ -120,68 +120,6 @@ def set_scheduler(self, scheduler_type: str):
sampling = getattr(library, "sampling")
self.sampler = getattr(sampling, scheduler_type)
- def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"):
- r"""
- Enable sliced attention computation.
-
- When this option is enabled, the attention module will split the input tensor in slices, to compute attention
- in several steps. This is useful to save some memory in exchange for a small speed decrease.
-
- Args:
- slice_size (`str` or `int`, *optional*, defaults to `"auto"`):
- When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If
- a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case,
- `attention_head_dim` must be a multiple of `slice_size`.
- """
- if slice_size == "auto":
- # half the attention head size is usually a good trade-off between
- # speed and memory
- slice_size = self.unet.config.attention_head_dim // 2
- self.unet.set_attention_slice(slice_size)
-
- def disable_attention_slicing(self):
- r"""
- Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go
- back to computing attention in one step.
- """
- # set slice_size = `None` to disable `attention slicing`
- self.enable_attention_slicing(None)
-
- def enable_sequential_cpu_offload(self, gpu_id=0):
- r"""
- Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
- text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
- `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
- """
- if is_accelerate_available():
- from accelerate import cpu_offload
- else:
- raise ImportError("Please install accelerate via `pip install accelerate`")
-
- device = torch.device(f"cuda:{gpu_id}")
-
- for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae, self.safety_checker]:
- if cpu_offloaded_model is not None:
- cpu_offload(cpu_offloaded_model, device)
-
- @property
- def _execution_device(self):
- r"""
- Returns the device on which the pipeline's models will be executed. After calling
- `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module
- hooks.
- """
- if self.device != torch.device("meta") or not hasattr(self.unet, "_hf_hook"):
- return self.device
- for module in self.unet.modules():
- if (
- hasattr(module, "_hf_hook")
- and hasattr(module._hf_hook, "execution_device")
- and module._hf_hook.execution_device is not None
- ):
- return torch.device(module._hf_hook.execution_device)
- return self.device
-
def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt):
r"""
Encodes the prompt into text encoder hidden states.
diff --git a/examples/community/seed_resize_stable_diffusion.py b/examples/community/seed_resize_stable_diffusion.py
index 9318277b8f01..c84c222f2360 100644
--- a/examples/community/seed_resize_stable_diffusion.py
+++ b/examples/community/seed_resize_stable_diffusion.py
@@ -9,6 +9,7 @@
from diffusers import DiffusionPipeline
from diffusers.models import AutoencoderKL, UNet2DConditionModel
+from diffusers.pipelines.pipeline_utils import EfficiencyMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
@@ -18,7 +19,7 @@
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class SeedResizeStableDiffusionPipeline(DiffusionPipeline):
+class SeedResizeStableDiffusionPipeline(DiffusionPipeline, EfficiencyMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion.
@@ -67,33 +68,6 @@ def __init__(
feature_extractor=feature_extractor,
)
- def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"):
- r"""
- Enable sliced attention computation.
-
- When this option is enabled, the attention module will split the input tensor in slices, to compute attention
- in several steps. This is useful to save some memory in exchange for a small speed decrease.
-
- Args:
- slice_size (`str` or `int`, *optional*, defaults to `"auto"`):
- When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If
- a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case,
- `attention_head_dim` must be a multiple of `slice_size`.
- """
- if slice_size == "auto":
- # half the attention head size is usually a good trade-off between
- # speed and memory
- slice_size = self.unet.config.attention_head_dim // 2
- self.unet.set_attention_slice(slice_size)
-
- def disable_attention_slicing(self):
- r"""
- Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go
- back to computing attention in one step.
- """
- # set slice_size = `None` to disable `attention slicing`
- self.enable_attention_slicing(None)
-
@torch.no_grad()
def __call__(
self,
diff --git a/examples/community/speech_to_image_diffusion.py b/examples/community/speech_to_image_diffusion.py
index 63bcfb662517..7f0bf4bf0293 100644
--- a/examples/community/speech_to_image_diffusion.py
+++ b/examples/community/speech_to_image_diffusion.py
@@ -18,6 +18,7 @@
PNDMScheduler,
UNet2DConditionModel,
)
+from diffusers.pipelines.pipeline_utils import EfficiencyMixin
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.utils import logging
@@ -26,7 +27,7 @@
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class SpeechToImagePipeline(DiffusionPipeline):
+class SpeechToImagePipeline(DiffusionPipeline, EfficiencyMixin):
def __init__(
self,
speech_model: WhisperForConditionalGeneration,
@@ -62,14 +63,6 @@ def __init__(
feature_extractor=feature_extractor,
)
- def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"):
- if slice_size == "auto":
- slice_size = self.unet.config.attention_head_dim // 2
- self.unet.set_attention_slice(slice_size)
-
- def disable_attention_slicing(self):
- self.enable_attention_slicing(None)
-
@torch.no_grad()
def __call__(
self,
diff --git a/examples/community/stable_diffusion_comparison.py b/examples/community/stable_diffusion_comparison.py
index 7997a0cc0186..49c9ffa86a61 100644
--- a/examples/community/stable_diffusion_comparison.py
+++ b/examples/community/stable_diffusion_comparison.py
@@ -12,6 +12,7 @@
StableDiffusionPipeline,
UNet2DConditionModel,
)
+from diffusers.pipelines.pipeline_utils import EfficiencyMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -22,7 +23,7 @@
pipe4_model_id = "CompVis/stable-diffusion-v1-4"
-class StableDiffusionComparisonPipeline(DiffusionPipeline):
+class StableDiffusionComparisonPipeline(DiffusionPipeline, EfficiencyMixin):
r"""
Pipeline for parallel comparison of Stable Diffusion v1-v4
This pipeline inherits from DiffusionPipeline and depends on the use of an Auth Token for
@@ -83,31 +84,6 @@ def __init__(
def layers(self) -> Dict[str, Any]:
return {k: getattr(self, k) for k in self.config.keys() if not k.startswith("_")}
- def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"):
- r"""
- Enable sliced attention computation.
- When this option is enabled, the attention module will split the input tensor in slices, to compute attention
- in several steps. This is useful to save some memory in exchange for a small speed decrease.
- Args:
- slice_size (`str` or `int`, *optional*, defaults to `"auto"`):
- When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If
- a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case,
- `attention_head_dim` must be a multiple of `slice_size`.
- """
- if slice_size == "auto":
- # half the attention head size is usually a good trade-off between
- # speed and memory
- slice_size = self.unet.config.attention_head_dim // 2
- self.unet.set_attention_slice(slice_size)
-
- def disable_attention_slicing(self):
- r"""
- Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go
- back to computing attention in one step.
- """
- # set slice_size = `None` to disable `attention slicing`
- self.enable_attention_slicing(None)
-
@torch.no_grad()
def text2img_sd1_1(
self,
diff --git a/examples/community/stable_diffusion_controlnet_img2img.py b/examples/community/stable_diffusion_controlnet_img2img.py
index a2b92fff0fb5..f961c767e416 100644
--- a/examples/community/stable_diffusion_controlnet_img2img.py
+++ b/examples/community/stable_diffusion_controlnet_img2img.py
@@ -14,8 +14,6 @@
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
PIL_INTERPOLATION,
- is_accelerate_available,
- is_accelerate_version,
replace_example_docstring,
)
from diffusers.utils.torch_utils import randn_tensor
@@ -183,89 +181,6 @@ def __init__(
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.register_to_config(requires_safety_checker=requires_safety_checker)
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding.
-
- When this option is enabled, the VAE will split the input tensor in slices to compute decoding in several
- steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- def enable_sequential_cpu_offload(self, gpu_id=0):
- r"""
- Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
- text_encoder, vae, controlnet, and safety checker have their state dicts saved to CPU and then are moved to a
- `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
- Note that offloading happens on a submodule basis. Memory savings are higher than with
- `enable_model_cpu_offload`, but performance is lower.
- """
- if is_accelerate_available():
- from accelerate import cpu_offload
- else:
- raise ImportError("Please install accelerate via `pip install accelerate`")
-
- device = torch.device(f"cuda:{gpu_id}")
-
- for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae, self.controlnet]:
- cpu_offload(cpu_offloaded_model, device)
-
- if self.safety_checker is not None:
- cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True)
-
- def enable_model_cpu_offload(self, gpu_id=0):
- r"""
- Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
- to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
- method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
- `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
- """
- if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
- from accelerate import cpu_offload_with_hook
- else:
- raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
-
- device = torch.device(f"cuda:{gpu_id}")
-
- hook = None
- for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]:
- _, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
-
- if self.safety_checker is not None:
- # the safety checker can offload the vae again
- _, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook)
-
- # control net hook has be manually offloaded as it alternates with unet
- cpu_offload_with_hook(self.controlnet, device)
-
- # We'll offload the last model manually.
- self.final_offload_hook = hook
-
- @property
- def _execution_device(self):
- r"""
- Returns the device on which the pipeline's models will be executed. After calling
- `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module
- hooks.
- """
- if not hasattr(self.unet, "_hf_hook"):
- return self.device
- for module in self.unet.modules():
- if (
- hasattr(module, "_hf_hook")
- and hasattr(module._hf_hook, "execution_device")
- and module._hf_hook.execution_device is not None
- ):
- return torch.device(module._hf_hook.execution_device)
- return self.device
-
def _encode_prompt(
self,
prompt,
diff --git a/examples/community/stable_diffusion_controlnet_inpaint.py b/examples/community/stable_diffusion_controlnet_inpaint.py
index b87973366418..76e6e331abcb 100644
--- a/examples/community/stable_diffusion_controlnet_inpaint.py
+++ b/examples/community/stable_diffusion_controlnet_inpaint.py
@@ -15,8 +15,6 @@
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
PIL_INTERPOLATION,
- is_accelerate_available,
- is_accelerate_version,
replace_example_docstring,
)
from diffusers.utils.torch_utils import randn_tensor
@@ -282,89 +280,6 @@ def __init__(
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.register_to_config(requires_safety_checker=requires_safety_checker)
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding.
-
- When this option is enabled, the VAE will split the input tensor in slices to compute decoding in several
- steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- def enable_sequential_cpu_offload(self, gpu_id=0):
- r"""
- Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
- text_encoder, vae, controlnet, and safety checker have their state dicts saved to CPU and then are moved to a
- `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
- Note that offloading happens on a submodule basis. Memory savings are higher than with
- `enable_model_cpu_offload`, but performance is lower.
- """
- if is_accelerate_available():
- from accelerate import cpu_offload
- else:
- raise ImportError("Please install accelerate via `pip install accelerate`")
-
- device = torch.device(f"cuda:{gpu_id}")
-
- for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae, self.controlnet]:
- cpu_offload(cpu_offloaded_model, device)
-
- if self.safety_checker is not None:
- cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True)
-
- def enable_model_cpu_offload(self, gpu_id=0):
- r"""
- Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
- to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
- method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
- `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
- """
- if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
- from accelerate import cpu_offload_with_hook
- else:
- raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
-
- device = torch.device(f"cuda:{gpu_id}")
-
- hook = None
- for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]:
- _, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
-
- if self.safety_checker is not None:
- # the safety checker can offload the vae again
- _, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook)
-
- # control net hook has be manually offloaded as it alternates with unet
- cpu_offload_with_hook(self.controlnet, device)
-
- # We'll offload the last model manually.
- self.final_offload_hook = hook
-
- @property
- def _execution_device(self):
- r"""
- Returns the device on which the pipeline's models will be executed. After calling
- `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module
- hooks.
- """
- if not hasattr(self.unet, "_hf_hook"):
- return self.device
- for module in self.unet.modules():
- if (
- hasattr(module, "_hf_hook")
- and hasattr(module._hf_hook, "execution_device")
- and module._hf_hook.execution_device is not None
- ):
- return torch.device(module._hf_hook.execution_device)
- return self.device
-
def _encode_prompt(
self,
prompt,
diff --git a/examples/community/stable_diffusion_controlnet_inpaint_img2img.py b/examples/community/stable_diffusion_controlnet_inpaint_img2img.py
index 96ad3c39239d..34b8170f66c8 100644
--- a/examples/community/stable_diffusion_controlnet_inpaint_img2img.py
+++ b/examples/community/stable_diffusion_controlnet_inpaint_img2img.py
@@ -14,8 +14,6 @@
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
PIL_INTERPOLATION,
- is_accelerate_available,
- is_accelerate_version,
replace_example_docstring,
)
from diffusers.utils.torch_utils import randn_tensor
@@ -267,89 +265,6 @@ def __init__(
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.register_to_config(requires_safety_checker=requires_safety_checker)
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding.
-
- When this option is enabled, the VAE will split the input tensor in slices to compute decoding in several
- steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- def enable_sequential_cpu_offload(self, gpu_id=0):
- r"""
- Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
- text_encoder, vae, controlnet, and safety checker have their state dicts saved to CPU and then are moved to a
- `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
- Note that offloading happens on a submodule basis. Memory savings are higher than with
- `enable_model_cpu_offload`, but performance is lower.
- """
- if is_accelerate_available():
- from accelerate import cpu_offload
- else:
- raise ImportError("Please install accelerate via `pip install accelerate`")
-
- device = torch.device(f"cuda:{gpu_id}")
-
- for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae, self.controlnet]:
- cpu_offload(cpu_offloaded_model, device)
-
- if self.safety_checker is not None:
- cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True)
-
- def enable_model_cpu_offload(self, gpu_id=0):
- r"""
- Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
- to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
- method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
- `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
- """
- if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
- from accelerate import cpu_offload_with_hook
- else:
- raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
-
- device = torch.device(f"cuda:{gpu_id}")
-
- hook = None
- for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]:
- _, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
-
- if self.safety_checker is not None:
- # the safety checker can offload the vae again
- _, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook)
-
- # control net hook has be manually offloaded as it alternates with unet
- cpu_offload_with_hook(self.controlnet, device)
-
- # We'll offload the last model manually.
- self.final_offload_hook = hook
-
- @property
- def _execution_device(self):
- r"""
- Returns the device on which the pipeline's models will be executed. After calling
- `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module
- hooks.
- """
- if not hasattr(self.unet, "_hf_hook"):
- return self.device
- for module in self.unet.modules():
- if (
- hasattr(module, "_hf_hook")
- and hasattr(module._hf_hook, "execution_device")
- and module._hf_hook.execution_device is not None
- ):
- return torch.device(module._hf_hook.execution_device)
- return self.device
-
def _encode_prompt(
self,
prompt,
diff --git a/examples/community/stable_diffusion_ipex.py b/examples/community/stable_diffusion_ipex.py
index bf58cc8453a1..48048d5831f4 100644
--- a/examples/community/stable_diffusion_ipex.py
+++ b/examples/community/stable_diffusion_ipex.py
@@ -23,14 +23,12 @@
from diffusers.configuration_utils import FrozenDict
from diffusers.loaders import LoraLoaderMixin, TextualInversionLoaderMixin
from diffusers.models import AutoencoderKL, UNet2DConditionModel
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
deprecate,
- is_accelerate_available,
- is_accelerate_version,
logging,
replace_example_docstring,
)
@@ -62,7 +60,7 @@
"""
-class StableDiffusionIPEXPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin):
+class StableDiffusionIPEXPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion on IPEX.
@@ -304,109 +302,6 @@ def prepare_for_ipex(self, promt, dtype=torch.float32, height=None, width=None,
ave_decoder_trace_model = torch.jit.freeze(ave_decoder_trace_model)
self.vae.decoder.forward = ave_decoder_trace_model.forward
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding.
-
- When this option is enabled, the VAE will split the input tensor in slices to compute decoding in several
- steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding.
-
- When this option is enabled, the VAE will split the input tensor into tiles to compute decoding and encoding in
- several steps. This is useful to save a large amount of memory and to allow the processing of larger images.
- """
- self.vae.enable_tiling()
-
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously invoked, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
- def enable_sequential_cpu_offload(self, gpu_id=0):
- r"""
- Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
- text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
- `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
- Note that offloading happens on a submodule basis. Memory savings are higher than with
- `enable_model_cpu_offload`, but performance is lower.
- """
- if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
- from accelerate import cpu_offload
- else:
- raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
-
- device = torch.device(f"cuda:{gpu_id}")
-
- if self.device.type != "cpu":
- self.to("cpu", silence_dtype_warnings=True)
- torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
-
- for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]:
- cpu_offload(cpu_offloaded_model, device)
-
- if self.safety_checker is not None:
- cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True)
-
- def enable_model_cpu_offload(self, gpu_id=0):
- r"""
- Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
- to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
- method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
- `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
- """
- if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
- from accelerate import cpu_offload_with_hook
- else:
- raise ImportError("`enable_model_offload` requires `accelerate v0.17.0` or higher.")
-
- device = torch.device(f"cuda:{gpu_id}")
-
- if self.device.type != "cpu":
- self.to("cpu", silence_dtype_warnings=True)
- torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
-
- hook = None
- for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]:
- _, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
-
- if self.safety_checker is not None:
- _, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook)
-
- # We'll offload the last model manually.
- self.final_offload_hook = hook
-
- @property
- def _execution_device(self):
- r"""
- Returns the device on which the pipeline's models will be executed. After calling
- `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module
- hooks.
- """
- if not hasattr(self.unet, "_hf_hook"):
- return self.device
- for module in self.unet.modules():
- if (
- hasattr(module, "_hf_hook")
- and hasattr(module._hf_hook, "execution_device")
- and module._hf_hook.execution_device is not None
- ):
- return torch.device(module._hf_hook.execution_device)
- return self.device
-
def _encode_prompt(
self,
prompt,
diff --git a/examples/community/stable_diffusion_mega.py b/examples/community/stable_diffusion_mega.py
index faed00b49d40..470cbab5a527 100644
--- a/examples/community/stable_diffusion_mega.py
+++ b/examples/community/stable_diffusion_mega.py
@@ -16,6 +16,7 @@
UNet2DConditionModel,
)
from diffusers.configuration_utils import FrozenDict
+from diffusers.pipelines.pipeline_utils import EfficiencyMixin
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.utils import deprecate, logging
@@ -23,7 +24,7 @@
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class StableDiffusionMegaPipeline(DiffusionPipeline):
+class StableDiffusionMegaPipeline(DiffusionPipeline, EfficiencyMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion.
@@ -94,33 +95,6 @@ def __init__(
def components(self) -> Dict[str, Any]:
return {k: getattr(self, k) for k in self.config.keys() if not k.startswith("_")}
- def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"):
- r"""
- Enable sliced attention computation.
-
- When this option is enabled, the attention module will split the input tensor in slices, to compute attention
- in several steps. This is useful to save some memory in exchange for a small speed decrease.
-
- Args:
- slice_size (`str` or `int`, *optional*, defaults to `"auto"`):
- When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If
- a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case,
- `attention_head_dim` must be a multiple of `slice_size`.
- """
- if slice_size == "auto":
- # half the attention head size is usually a good trade-off between
- # speed and memory
- slice_size = self.unet.config.attention_head_dim // 2
- self.unet.set_attention_slice(slice_size)
-
- def disable_attention_slicing(self):
- r"""
- Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go
- back to computing attention in one step.
- """
- # set slice_size = `None` to disable `attention slicing`
- self.enable_attention_slicing(None)
-
@torch.no_grad()
def inpaint(
self,
diff --git a/examples/community/stable_diffusion_repaint.py b/examples/community/stable_diffusion_repaint.py
index db2de0897570..38400b578f97 100644
--- a/examples/community/stable_diffusion_repaint.py
+++ b/examples/community/stable_diffusion_repaint.py
@@ -24,14 +24,13 @@
from diffusers import AutoencoderKL, DiffusionPipeline, UNet2DConditionModel
from diffusers.configuration_utils import FrozenDict, deprecate
from diffusers.loaders import LoraLoaderMixin, TextualInversionLoaderMixin
+from diffusers.pipelines.pipeline_utils import EfficiencyMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import (
StableDiffusionSafetyChecker,
)
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
- is_accelerate_available,
- is_accelerate_version,
logging,
)
from diffusers.utils.torch_utils import randn_tensor
@@ -140,7 +139,7 @@ def prepare_mask_and_masked_image(image, mask):
return mask, masked_image
-class StableDiffusionRepaintPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin):
+class StableDiffusionRepaintPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
r"""
Pipeline for text-guided image inpainting using Stable Diffusion. *This is an experimental feature*.
This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
@@ -276,80 +275,6 @@ def __init__(
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.register_to_config(requires_safety_checker=requires_safety_checker)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload
- def enable_sequential_cpu_offload(self, gpu_id=0):
- r"""
- Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
- text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
- `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
- Note that offloading happens on a submodule basis. Memory savings are higher than with
- `enable_model_cpu_offload`, but performance is lower.
- """
- if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
- from accelerate import cpu_offload
- else:
- raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
-
- device = torch.device(f"cuda:{gpu_id}")
-
- if self.device.type != "cpu":
- self.to("cpu", silence_dtype_warnings=True)
- torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
-
- for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]:
- cpu_offload(cpu_offloaded_model, device)
-
- if self.safety_checker is not None:
- cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_model_cpu_offload
- def enable_model_cpu_offload(self, gpu_id=0):
- r"""
- Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
- to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
- method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
- `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
- """
- if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
- from accelerate import cpu_offload_with_hook
- else:
- raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
-
- device = torch.device(f"cuda:{gpu_id}")
-
- if self.device.type != "cpu":
- self.to("cpu", silence_dtype_warnings=True)
- torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
-
- hook = None
- for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]:
- _, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
-
- if self.safety_checker is not None:
- _, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook)
-
- # We'll offload the last model manually.
- self.final_offload_hook = hook
-
- @property
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._execution_device
- def _execution_device(self):
- r"""
- Returns the device on which the pipeline's models will be executed. After calling
- `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module
- hooks.
- """
- if not hasattr(self.unet, "_hf_hook"):
- return self.device
- for module in self.unet.modules():
- if (
- hasattr(module, "_hf_hook")
- and hasattr(module._hf_hook, "execution_device")
- and module._hf_hook.execution_device is not None
- ):
- return torch.device(module._hf_hook.execution_device)
- return self.device
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt(
self,
diff --git a/examples/community/text_inpainting.py b/examples/community/text_inpainting.py
index cd02049a4afb..80889d7897bd 100644
--- a/examples/community/text_inpainting.py
+++ b/examples/community/text_inpainting.py
@@ -13,16 +13,17 @@
from diffusers import DiffusionPipeline
from diffusers.configuration_utils import FrozenDict
from diffusers.models import AutoencoderKL, UNet2DConditionModel
+from diffusers.pipelines.pipeline_utils import EfficiencyMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionInpaintPipeline
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
-from diffusers.utils import deprecate, is_accelerate_available, logging
+from diffusers.utils import deprecate, logging
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class TextInpainting(DiffusionPipeline):
+class TextInpainting(DiffusionPipeline, EfficiencyMixin):
r"""
Pipeline for text based inpainting using Stable Diffusion.
Uses CLIPSeg to get a mask from the given text, then calls the Inpainting pipeline with the generated mask
@@ -120,69 +121,6 @@ def __init__(
feature_extractor=feature_extractor,
)
- def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"):
- r"""
- Enable sliced attention computation.
-
- When this option is enabled, the attention module will split the input tensor in slices, to compute attention
- in several steps. This is useful to save some memory in exchange for a small speed decrease.
-
- Args:
- slice_size (`str` or `int`, *optional*, defaults to `"auto"`):
- When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If
- a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case,
- `attention_head_dim` must be a multiple of `slice_size`.
- """
- if slice_size == "auto":
- # half the attention head size is usually a good trade-off between
- # speed and memory
- slice_size = self.unet.config.attention_head_dim // 2
- self.unet.set_attention_slice(slice_size)
-
- def disable_attention_slicing(self):
- r"""
- Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go
- back to computing attention in one step.
- """
- # set slice_size = `None` to disable `attention slicing`
- self.enable_attention_slicing(None)
-
- def enable_sequential_cpu_offload(self):
- r"""
- Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
- text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
- `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
- """
- if is_accelerate_available():
- from accelerate import cpu_offload
- else:
- raise ImportError("Please install accelerate via `pip install accelerate`")
-
- device = torch.device("cuda")
-
- for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae, self.safety_checker]:
- if cpu_offloaded_model is not None:
- cpu_offload(cpu_offloaded_model, device)
-
- @property
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._execution_device
- def _execution_device(self):
- r"""
- Returns the device on which the pipeline's models will be executed. After calling
- `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module
- hooks.
- """
- if self.device != torch.device("meta") or not hasattr(self.unet, "_hf_hook"):
- return self.device
- for module in self.unet.modules():
- if (
- hasattr(module, "_hf_hook")
- and hasattr(module._hf_hook, "execution_device")
- and module._hf_hook.execution_device is not None
- ):
- return torch.device(module._hf_hook.execution_device)
- return self.device
-
@torch.no_grad()
def __call__(
self,
diff --git a/examples/community/unclip_image_interpolation.py b/examples/community/unclip_image_interpolation.py
index 95548b152c07..e3bb44e5030b 100644
--- a/examples/community/unclip_image_interpolation.py
+++ b/examples/community/unclip_image_interpolation.py
@@ -19,7 +19,7 @@
UNet2DModel,
)
from diffusers.pipelines.unclip import UnCLIPTextProjModel
-from diffusers.utils import is_accelerate_available, logging
+from diffusers.utils import logging
from diffusers.utils.torch_utils import randn_tensor
@@ -204,50 +204,6 @@ def _encode_image(self, image, device, num_images_per_prompt, image_embeddings:
return image_embeddings
- # Copied from diffusers.pipelines.unclip.pipeline_unclip_image_variation.UnCLIPImageVariationPipeline.enable_sequential_cpu_offload
- def enable_sequential_cpu_offload(self, gpu_id=0):
- r"""
- Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's
- models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only
- when their specific submodule has its `forward` method called.
- """
- if is_accelerate_available():
- from accelerate import cpu_offload
- else:
- raise ImportError("Please install accelerate via `pip install accelerate`")
-
- device = torch.device(f"cuda:{gpu_id}")
-
- models = [
- self.decoder,
- self.text_proj,
- self.text_encoder,
- self.super_res_first,
- self.super_res_last,
- ]
- for cpu_offloaded_model in models:
- if cpu_offloaded_model is not None:
- cpu_offload(cpu_offloaded_model, device)
-
- @property
- # Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline._execution_device
- def _execution_device(self):
- r"""
- Returns the device on which the pipeline's models will be executed. After calling
- `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module
- hooks.
- """
- if self.device != torch.device("meta") or not hasattr(self.decoder, "_hf_hook"):
- return self.device
- for module in self.decoder.modules():
- if (
- hasattr(module, "_hf_hook")
- and hasattr(module._hf_hook, "execution_device")
- and module._hf_hook.execution_device is not None
- ):
- return torch.device(module._hf_hook.execution_device)
- return self.device
-
@torch.no_grad()
def __call__(
self,
diff --git a/examples/community/unclip_text_interpolation.py b/examples/community/unclip_text_interpolation.py
index 764299433b4c..be6a0858b35e 100644
--- a/examples/community/unclip_text_interpolation.py
+++ b/examples/community/unclip_text_interpolation.py
@@ -15,7 +15,7 @@
UNet2DModel,
)
from diffusers.pipelines.unclip import UnCLIPTextProjModel
-from diffusers.utils import is_accelerate_available, logging
+from diffusers.utils import logging
from diffusers.utils.torch_utils import randn_tensor
@@ -212,51 +212,6 @@ def _encode_prompt(
return prompt_embeds, text_encoder_hidden_states, text_mask
- # Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.enable_sequential_cpu_offload
- def enable_sequential_cpu_offload(self, gpu_id=0):
- r"""
- Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's
- models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only
- when their specific submodule has its `forward` method called.
- """
- if is_accelerate_available():
- from accelerate import cpu_offload
- else:
- raise ImportError("Please install accelerate via `pip install accelerate`")
-
- device = torch.device(f"cuda:{gpu_id}")
-
- # TODO: self.prior.post_process_latents is not covered by the offload hooks, so it fails if added to the list
- models = [
- self.decoder,
- self.text_proj,
- self.text_encoder,
- self.super_res_first,
- self.super_res_last,
- ]
- for cpu_offloaded_model in models:
- if cpu_offloaded_model is not None:
- cpu_offload(cpu_offloaded_model, device)
-
- @property
- # Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline._execution_device
- def _execution_device(self):
- r"""
- Returns the device on which the pipeline's models will be executed. After calling
- `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module
- hooks.
- """
- if self.device != torch.device("meta") or not hasattr(self.decoder, "_hf_hook"):
- return self.device
- for module in self.decoder.modules():
- if (
- hasattr(module, "_hf_hook")
- and hasattr(module._hf_hook, "execution_device")
- and module._hf_hook.execution_device is not None
- ):
- return torch.device(module._hf_hook.execution_device)
- return self.device
-
@torch.no_grad()
def __call__(
self,
diff --git a/examples/research_projects/controlnetxs/pipeline_controlnet_xs.py b/examples/research_projects/controlnetxs/pipeline_controlnet_xs.py
index 32646c7c7715..63461fadfc14 100644
--- a/examples/research_projects/controlnetxs/pipeline_controlnet_xs.py
+++ b/examples/research_projects/controlnetxs/pipeline_controlnet_xs.py
@@ -26,7 +26,7 @@
from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
from diffusers.models import AutoencoderKL, UNet2DConditionModel
from diffusers.models.lora import adjust_lora_scale_text_encoder
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin
from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import KarrasDiffusionSchedulers
@@ -44,7 +44,7 @@
class StableDiffusionControlNetXSPipeline(
- DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
+ DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
):
r"""
Pipeline for text-to-image generation using Stable Diffusion with ControlNet-XS guidance.
@@ -139,39 +139,6 @@ def __init__(
)
self.register_to_config(requires_safety_checker=requires_safety_checker)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt(
self,
@@ -596,34 +563,6 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype
latents = latents * self.scheduler.init_noise_sigma
return latents
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
@torch.no_grad()
def __call__(
self,
diff --git a/examples/research_projects/controlnetxs/pipeline_controlnet_xs_sd_xl.py b/examples/research_projects/controlnetxs/pipeline_controlnet_xs_sd_xl.py
index b9b390f1c00c..bb2e6ad1dff7 100644
--- a/examples/research_projects/controlnetxs/pipeline_controlnet_xs_sd_xl.py
+++ b/examples/research_projects/controlnetxs/pipeline_controlnet_xs_sd_xl.py
@@ -31,7 +31,7 @@
XFormersAttnProcessor,
)
from diffusers.models.lora import adjust_lora_scale_text_encoder
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin
from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
@@ -52,7 +52,11 @@
class StableDiffusionXLControlNetXSPipeline(
- DiffusionPipeline, TextualInversionLoaderMixin, StableDiffusionXLLoraLoaderMixin, FromSingleFileMixin
+ DiffusionPipeline,
+ EfficiencyMixin,
+ TextualInversionLoaderMixin,
+ StableDiffusionXLLoraLoaderMixin,
+ FromSingleFileMixin,
):
r"""
Pipeline for text-to-image generation using Stable Diffusion XL with ControlNet-XS guidance.
@@ -145,39 +149,6 @@ def __init__(
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt
def encode_prompt(
self,
@@ -661,34 +632,6 @@ def upcast_vae(self):
self.vae.decoder.conv_in.to(dtype)
self.vae.decoder.mid_block.to(dtype)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
@torch.no_grad()
def __call__(
self,
diff --git a/examples/research_projects/rdm/pipeline_rdm.py b/examples/research_projects/rdm/pipeline_rdm.py
index 28b4cacb8319..5398ee2e5331 100644
--- a/examples/research_projects/rdm/pipeline_rdm.py
+++ b/examples/research_projects/rdm/pipeline_rdm.py
@@ -17,10 +17,10 @@
LMSDiscreteScheduler,
PNDMScheduler,
UNet2DConditionModel,
- logging,
)
from diffusers.image_processor import VaeImageProcessor
-from diffusers.utils import is_accelerate_available, randn_tensor
+from diffusers.utils import logging
+from diffusers.utils.torch_utils import randn_tensor
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
@@ -81,121 +81,6 @@ def __init__(
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.retriever = retriever
- def enable_xformers_memory_efficient_attention(self):
- r"""
- Enable memory efficient attention as implemented in xformers.
-
- When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference
- time. Speed up at training time is not guaranteed.
-
- Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention
- is used.
- """
- self.unet.set_use_memory_efficient_attention_xformers(True)
-
- def disable_xformers_memory_efficient_attention(self):
- r"""
- Disable memory efficient attention as implemented in xformers.
- """
- self.unet.set_use_memory_efficient_attention_xformers(False)
-
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding.
-
- When this option is enabled, the VAE will split the input tensor in slices to compute decoding in several
- steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding.
-
- When this option is enabled, the VAE will split the input tensor into tiles to compute decoding and encoding in
- several steps. This is useful to save a large amount of memory and to allow the processing of larger images.
- """
- self.vae.enable_tiling()
-
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously invoked, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
- def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"):
- r"""
- Enable sliced attention computation.
-
- When this option is enabled, the attention module will split the input tensor in slices, to compute attention
- in several steps. This is useful to save some memory in exchange for a small speed decrease.
-
- Args:
- slice_size (`str` or `int`, *optional*, defaults to `"auto"`):
- When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If
- a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case,
- `attention_head_dim` must be a multiple of `slice_size`.
- """
- if slice_size == "auto":
- # half the attention head size is usually a good trade-off between
- # speed and memory
- if isinstance(self.unet.config.attention_head_dim, int):
- slice_size = self.unet.config.attention_head_dim // 2
- else:
- slice_size = self.unet.config.attention_head_dim[0] // 2
- self.unet.set_attention_slice(slice_size)
-
- def disable_attention_slicing(self):
- r"""
- Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go
- back to computing attention in one step.
- """
- # set slice_size = `None` to disable `attention slicing`
- self.enable_attention_slicing(None)
-
- def enable_sequential_cpu_offload(self):
- r"""
- Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
- text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
- `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
- """
- if is_accelerate_available():
- from accelerate import cpu_offload
- else:
- raise ImportError("Please install accelerate via `pip install accelerate`")
-
- device = torch.device("cuda")
-
- for cpu_offloaded_model in [self.unet, self.clip, self.vae]:
- if cpu_offloaded_model is not None:
- cpu_offload(cpu_offloaded_model, device)
-
- @property
- def _execution_device(self):
- r"""
- Returns the device on which the pipeline's models will be executed. After calling
- `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module
- hooks.
- """
- if not hasattr(self.unet, "_hf_hook"):
- return self.device
- for module in self.unet.modules():
- if (
- hasattr(module, "_hf_hook")
- and hasattr(module._hf_hook, "execution_device")
- and module._hf_hook.execution_device is not None
- ):
- return torch.device(module._hf_hook.execution_device)
- return self.device
-
def _encode_prompt(self, prompt):
# get prompt text embeddings
text_inputs = self.tokenizer(
diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py
index c46dadb53e6a..cbdab46cb8e4 100644
--- a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py
+++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py
@@ -42,7 +42,7 @@
)
from ...utils.torch_utils import randn_tensor
from ..free_init_utils import FreeInitMixin
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from .pipeline_output import AnimateDiffPipelineOutput
@@ -87,7 +87,9 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type:
class AnimateDiffPipeline(
- DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FreeInitMixin
+
+ DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FreeInitMixin
+
):
r"""
Pipeline for text-to-video generation.
@@ -411,66 +413,6 @@ def decode_latents(self, latents):
video = video.float()
return video
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
def prepare_extra_step_kwargs(self, generator, eta):
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
diff --git a/src/diffusers/pipelines/audioldm/pipeline_audioldm.py b/src/diffusers/pipelines/audioldm/pipeline_audioldm.py
index 438f6736b6a7..96cd554c112a 100644
--- a/src/diffusers/pipelines/audioldm/pipeline_audioldm.py
+++ b/src/diffusers/pipelines/audioldm/pipeline_audioldm.py
@@ -24,7 +24,7 @@
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import logging, replace_example_docstring
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline
+from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, EfficiencyMixin
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
@@ -49,7 +49,7 @@
"""
-class AudioLDMPipeline(DiffusionPipeline):
+class AudioLDMPipeline(DiffusionPipeline, EfficiencyMixin):
r"""
Pipeline for text-to-audio generation using AudioLDM.
@@ -96,22 +96,6 @@ def __init__(
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
def _encode_prompt(
self,
prompt,
diff --git a/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py b/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py
index dc6df780005e..1622657ed161 100644
--- a/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py
+++ b/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py
@@ -173,22 +173,6 @@ def __init__(
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py
index b186ec5cab2f..31b9107c7d2e 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py
@@ -36,7 +36,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import is_compiled_module, is_torch_version, randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from ..stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from .multicontrolnet import MultiControlNetModel
@@ -137,7 +137,12 @@ def retrieve_timesteps(
class StableDiffusionControlNetPipeline(
- DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin
+ DiffusionPipeline,
+ EfficiencyMixin,
+ TextualInversionLoaderMixin,
+ LoraLoaderMixin,
+ IPAdapterMixin,
+ FromSingleFileMixin,
):
r"""
Pipeline for text-to-image generation using Stable Diffusion with ControlNet guidance.
@@ -233,39 +238,6 @@ def __init__(
)
self.register_to_config(requires_safety_checker=requires_safety_checker)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt(
self,
@@ -824,34 +796,6 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype
latents = latents * self.scheduler.init_noise_sigma
return latents
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
"""
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py
index 10fc4384de29..a3cc0cf3108a 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py
@@ -35,7 +35,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import is_compiled_module, randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from .multicontrolnet import MultiControlNetModel
@@ -130,7 +130,12 @@ def prepare_image(image):
class StableDiffusionControlNetImg2ImgPipeline(
- DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin
+ DiffusionPipeline,
+ EfficiencyMixin,
+ TextualInversionLoaderMixin,
+ LoraLoaderMixin,
+ IPAdapterMixin,
+ FromSingleFileMixin,
):
r"""
Pipeline for image-to-image generation using Stable Diffusion with ControlNet guidance.
@@ -226,39 +231,6 @@ def __init__(
)
self.register_to_config(requires_safety_checker=requires_safety_checker)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt(
self,
@@ -866,34 +838,6 @@ def prepare_latents(self, image, timestep, batch_size, num_images_per_prompt, dt
return latents
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
@property
def guidance_scale(self):
return self._guidance_scale
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py
index 35a4ae67c9be..fcce6f88a3e6 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py
@@ -37,7 +37,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import is_compiled_module, randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from .multicontrolnet import MultiControlNetModel
@@ -241,7 +241,12 @@ def prepare_mask_and_masked_image(image, mask, height, width, return_image=False
class StableDiffusionControlNetInpaintPipeline(
- DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin
+ DiffusionPipeline,
+ EfficiencyMixin,
+ TextualInversionLoaderMixin,
+ LoraLoaderMixin,
+ IPAdapterMixin,
+ FromSingleFileMixin,
):
r"""
Pipeline for image inpainting using Stable Diffusion with ControlNet guidance.
@@ -351,39 +356,6 @@ def __init__(
)
self.register_to_config(requires_safety_checker=requires_safety_checker)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt(
self,
@@ -1076,34 +1048,6 @@ def _encode_vae_image(self, image: torch.Tensor, generator: torch.Generator):
return image_latents
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
@property
def guidance_scale(self):
return self._guidance_scale
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py
index f6308f0c324d..212fba1089ca 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py
@@ -42,7 +42,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import is_compiled_module, randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
from .multicontrolnet import MultiControlNetModel
@@ -140,7 +140,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
class StableDiffusionXLControlNetInpaintPipeline(
- DiffusionPipeline, StableDiffusionXLLoraLoaderMixin, FromSingleFileMixin
+ DiffusionPipeline, EfficiencyMixin, StableDiffusionXLLoraLoaderMixin, FromSingleFileMixin
):
r"""
Pipeline for text-to-image generation using Stable Diffusion XL.
@@ -229,39 +229,6 @@ def __init__(
else:
self.watermark = None
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt
def encode_prompt(
self,
@@ -1021,34 +988,6 @@ def upcast_vae(self):
self.vae.decoder.conv_in.to(dtype)
self.vae.decoder.mid_block.to(dtype)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
@property
def guidance_scale(self):
return self._guidance_scale
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py
index 7c5a6e39abd4..02c5e3092696 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py
@@ -55,7 +55,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import is_compiled_module, is_torch_version, randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
@@ -116,6 +116,7 @@
class StableDiffusionXLControlNetPipeline(
DiffusionPipeline,
+ EfficiencyMixin,
TextualInversionLoaderMixin,
StableDiffusionXLLoraLoaderMixin,
IPAdapterMixin,
@@ -222,39 +223,6 @@ def __init__(
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt
def encode_prompt(
self,
@@ -873,34 +841,6 @@ def upcast_vae(self):
self.vae.decoder.conv_in.to(dtype)
self.vae.decoder.mid_block.to(dtype)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
"""
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py
index 273297514a16..545285f4d5a9 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py
@@ -54,7 +54,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import is_compiled_module, randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
@@ -157,7 +157,7 @@ def retrieve_latents(
class StableDiffusionXLControlNetImg2ImgPipeline(
- DiffusionPipeline, TextualInversionLoaderMixin, StableDiffusionXLLoraLoaderMixin, IPAdapterMixin
+ DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, StableDiffusionXLLoraLoaderMixin, IPAdapterMixin
):
r"""
Pipeline for image-to-image generation using Stable Diffusion XL with ControlNet guidance.
@@ -271,39 +271,6 @@ def __init__(
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
self.register_to_config(requires_aesthetics_score=requires_aesthetics_score)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt
def encode_prompt(
self,
@@ -1030,34 +997,6 @@ def upcast_vae(self):
self.vae.decoder.conv_in.to(dtype)
self.vae.decoder.mid_block.to(dtype)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
@property
def guidance_scale(self):
return self._guidance_scale
diff --git a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py
index 1d377dd97855..47c9cfdf19f7 100644
--- a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py
+++ b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py
@@ -23,7 +23,6 @@
from ....image_processor import PipelineImageInput, VaeImageProcessor
from ....loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
from ....models import AutoencoderKL, ImageProjection, UNet2DConditionModel
-from ....models.attention_processor import FusedAttnProcessor2_0
from ....models.lora import adjust_lora_scale_text_encoder
from ....schedulers import KarrasDiffusionSchedulers
from ....utils import (
@@ -35,7 +34,7 @@
unscale_lora_layers,
)
from ....utils.torch_utils import randn_tensor
-from ...pipeline_utils import DiffusionPipeline
+from ...pipeline_utils import DiffusionPipeline, EfficiencyMixin
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from .modeling_roberta_series import RobertaSeriesModelWithTransformation
from .pipeline_output import AltDiffusionPipelineOutput
@@ -120,7 +119,12 @@ def retrieve_timesteps(
class AltDiffusionPipeline(
- DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin
+ DiffusionPipeline,
+ EfficiencyMixin,
+ TextualInversionLoaderMixin,
+ LoraLoaderMixin,
+ IPAdapterMixin,
+ FromSingleFileMixin,
):
r"""
Pipeline for text-to-image generation using Alt Diffusion.
@@ -252,35 +256,6 @@ def __init__(
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
def _encode_prompt(
self,
prompt,
@@ -629,91 +604,6 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype
latents = latents * self.scheduler.init_noise_sigma
return latents
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Alt Diffusion v1, v2, and Alt Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
- def fuse_qkv_projections(self, unet: bool = True, vae: bool = True):
- """
- Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
- key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
-
-
-
- This API is 🧪 experimental.
-
-
-
- Args:
- unet (`bool`, defaults to `True`): To apply fusion on the UNet.
- vae (`bool`, defaults to `True`): To apply fusion on the VAE.
- """
- self.fusing_unet = False
- self.fusing_vae = False
-
- if unet:
- self.fusing_unet = True
- self.unet.fuse_qkv_projections()
- self.unet.set_attn_processor(FusedAttnProcessor2_0())
-
- if vae:
- if not isinstance(self.vae, AutoencoderKL):
- raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.")
-
- self.fusing_vae = True
- self.vae.fuse_qkv_projections()
- self.vae.set_attn_processor(FusedAttnProcessor2_0())
-
- def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True):
- """Disable QKV projection fusion if enabled.
-
-
-
- This API is 🧪 experimental.
-
-
-
- Args:
- unet (`bool`, defaults to `True`): To apply fusion on the UNet.
- vae (`bool`, defaults to `True`): To apply fusion on the VAE.
-
- """
- if unet:
- if not self.fusing_unet:
- logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.")
- else:
- self.unet.unfuse_qkv_projections()
- self.fusing_unet = False
-
- if vae:
- if not self.fusing_vae:
- logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.")
- else:
- self.vae.unfuse_qkv_projections()
- self.fusing_vae = False
-
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
"""
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
diff --git a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py
index a9f058bb240b..14f65f0034a2 100644
--- a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py
+++ b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py
@@ -25,7 +25,6 @@
from ....image_processor import PipelineImageInput, VaeImageProcessor
from ....loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
from ....models import AutoencoderKL, ImageProjection, UNet2DConditionModel
-from ....models.attention_processor import FusedAttnProcessor2_0
from ....models.lora import adjust_lora_scale_text_encoder
from ....schedulers import KarrasDiffusionSchedulers
from ....utils import (
@@ -38,7 +37,7 @@
unscale_lora_layers,
)
from ....utils.torch_utils import randn_tensor
-from ...pipeline_utils import DiffusionPipeline
+from ...pipeline_utils import DiffusionPipeline, EfficiencyMixin
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from .modeling_roberta_series import RobertaSeriesModelWithTransformation
from .pipeline_output import AltDiffusionPipelineOutput
@@ -160,7 +159,12 @@ def retrieve_timesteps(
class AltDiffusionImg2ImgPipeline(
- DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FromSingleFileMixin
+ DiffusionPipeline,
+ EfficiencyMixin,
+ TextualInversionLoaderMixin,
+ IPAdapterMixin,
+ LoraLoaderMixin,
+ FromSingleFileMixin,
):
r"""
Pipeline for text-guided image-to-image generation using Alt Diffusion.
@@ -689,91 +693,6 @@ def prepare_latents(self, image, timestep, batch_size, num_images_per_prompt, dt
return latents
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Alt Diffusion v1, v2, and Alt Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
- def fuse_qkv_projections(self, unet: bool = True, vae: bool = True):
- """
- Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
- key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
-
-
-
- This API is 🧪 experimental.
-
-
-
- Args:
- unet (`bool`, defaults to `True`): To apply fusion on the UNet.
- vae (`bool`, defaults to `True`): To apply fusion on the VAE.
- """
- self.fusing_unet = False
- self.fusing_vae = False
-
- if unet:
- self.fusing_unet = True
- self.unet.fuse_qkv_projections()
- self.unet.set_attn_processor(FusedAttnProcessor2_0())
-
- if vae:
- if not isinstance(self.vae, AutoencoderKL):
- raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.")
-
- self.fusing_vae = True
- self.vae.fuse_qkv_projections()
- self.vae.set_attn_processor(FusedAttnProcessor2_0())
-
- def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True):
- """Disable QKV projection fusion if enabled.
-
-
-
- This API is 🧪 experimental.
-
-
-
- Args:
- unet (`bool`, defaults to `True`): To apply fusion on the UNet.
- vae (`bool`, defaults to `True`): To apply fusion on the VAE.
-
- """
- if unet:
- if not self.fusing_unet:
- logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.")
- else:
- self.unet.unfuse_qkv_projections()
- self.fusing_unet = False
-
- if vae:
- if not self.fusing_vae:
- logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.")
- else:
- self.vae.unfuse_qkv_projections()
- self.fusing_vae = False
-
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
"""
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
diff --git a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py
index e61c35f9c504..4d3415ca8139 100644
--- a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py
+++ b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py
@@ -26,7 +26,7 @@
from ....schedulers.scheduling_utils import SchedulerMixin
from ....utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
from ....utils.torch_utils import randn_tensor
-from ...pipeline_utils import DiffusionPipeline
+from ...pipeline_utils import DiffusionPipeline, EfficiencyMixin
from ...stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -36,7 +36,9 @@
AUGS_CONST = ["A photo of ", "An image of ", "A picture of "]
-class StableDiffusionModelEditingPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin):
+class StableDiffusionModelEditingPipeline(
+ DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin
+):
r"""
Pipeline for text-to-image model editing.
@@ -153,22 +155,6 @@ def append_ca(net_):
self.projection_matrices = self.projection_matrices + [l.to_k for l in self.ca_clip_layers]
self.og_matrices = self.og_matrices + [copy.deepcopy(l.to_k) for l in self.ca_clip_layers]
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt(
self,
diff --git a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py
index a37f2870cb02..88b9498c9ab0 100644
--- a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py
+++ b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py
@@ -32,7 +32,7 @@
unscale_lora_layers,
)
from ....utils.torch_utils import randn_tensor
-from ...pipeline_utils import DiffusionPipeline
+from ...pipeline_utils import DiffusionPipeline, EfficiencyMixin
from ...stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -63,7 +63,7 @@
class StableDiffusionParadigmsPipeline(
- DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
+ DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
):
r"""
Pipeline for text-to-image generation using a parallelized version of Stable Diffusion.
@@ -146,39 +146,6 @@ def __init__(
# attribute to wrap the unet with torch.nn.DataParallel when running multiple denoising steps on multiple GPUs
self.wrapped_unet = self.unet
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt(
self,
diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py
index acaeab1c6f50..744ca3d19675 100644
--- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py
+++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py
@@ -36,7 +36,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from ..stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker
@@ -129,7 +129,12 @@ def retrieve_timesteps(
class LatentConsistencyModelImg2ImgPipeline(
- DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FromSingleFileMixin
+ DiffusionPipeline,
+ EfficiencyMixin,
+ TextualInversionLoaderMixin,
+ IPAdapterMixin,
+ LoraLoaderMixin,
+ FromSingleFileMixin,
):
r"""
Pipeline for image-to-image generation using a latent consistency model.
@@ -209,67 +214,6 @@ def __init__(
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_prompt
def encode_prompt(
self,
diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py
index 469305f248e7..395e4575942d 100644
--- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py
+++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py
@@ -35,7 +35,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from ..stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker
@@ -107,7 +107,12 @@ def retrieve_timesteps(
class LatentConsistencyModelPipeline(
- DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FromSingleFileMixin
+ DiffusionPipeline,
+ EfficiencyMixin,
+ TextualInversionLoaderMixin,
+ IPAdapterMixin,
+ LoraLoaderMixin,
+ FromSingleFileMixin,
):
r"""
Pipeline for text-to-image generation using a latent consistency model.
@@ -193,67 +198,6 @@ def __init__(
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_prompt
def encode_prompt(
self,
diff --git a/src/diffusers/pipelines/musicldm/pipeline_musicldm.py b/src/diffusers/pipelines/musicldm/pipeline_musicldm.py
index 69bd0521d558..7c9617a3e572 100644
--- a/src/diffusers/pipelines/musicldm/pipeline_musicldm.py
+++ b/src/diffusers/pipelines/musicldm/pipeline_musicldm.py
@@ -36,7 +36,7 @@
replace_example_docstring,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline
+from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, EfficiencyMixin
if is_librosa_available():
@@ -64,7 +64,7 @@
"""
-class MusicLDMPipeline(DiffusionPipeline):
+class MusicLDMPipeline(DiffusionPipeline, EfficiencyMixin):
r"""
Pipeline for text-to-audio generation using MusicLDM.
@@ -113,22 +113,6 @@ def __init__(
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
def _encode_prompt(
self,
prompt,
diff --git a/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py b/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py
index 8effa94849c9..85c533acaddf 100644
--- a/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py
+++ b/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py
@@ -25,7 +25,7 @@
from ...schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
from ...utils import deprecate, logging
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from .image_encoder import PaintByExampleImageEncoder
@@ -148,7 +148,7 @@ def prepare_mask_and_masked_image(image, mask):
return mask, masked_image
-class PaintByExamplePipeline(DiffusionPipeline):
+class PaintByExamplePipeline(DiffusionPipeline, EfficiencyMixin):
r"""
diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py
index c2947e3f8dfe..36fe8d779d27 100644
--- a/src/diffusers/pipelines/pipeline_utils.py
+++ b/src/diffusers/pipelines/pipeline_utils.py
@@ -2097,10 +2097,12 @@ def set_attention_slice(self, slice_size: Optional[int]):
for module in modules:
module.set_attention_slice(slice_size)
+
class EfficiencyMixin:
r"""
Helper for DiffusionPipeline with vae and unet.(mainly for stable diffusion)
"""
+
def enable_vae_slicing(self):
r"""
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
@@ -2213,4 +2215,4 @@ def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True):
logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.")
else:
self.vae.unfuse_qkv_projections()
- self.fusing_vae = False
\ No newline at end of file
+ self.fusing_vae = False
diff --git a/src/diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py b/src/diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py
index a1cb3f5af378..6513adf5f67d 100644
--- a/src/diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py
+++ b/src/diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py
@@ -11,14 +11,14 @@
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import deprecate, logging
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from .pipeline_output import SemanticStableDiffusionPipelineOutput
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class SemanticStableDiffusionPipeline(DiffusionPipeline):
+class SemanticStableDiffusionPipeline(DiffusionPipeline, EfficiencyMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion with latent editing.
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index 5249e897ce85..c421f9d9d3f3 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -34,7 +34,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from .pipeline_output import StableDiffusionPipelineOutput
from .safety_checker import StableDiffusionSafetyChecker
@@ -115,7 +115,7 @@ def retrieve_timesteps(
class StableDiffusionPipeline(
- DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin
+ DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin
):
r"""
Pipeline for text-to-image generation using Stable Diffusion.
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py
index fa797a7d9f3a..687dab69455d 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py
@@ -26,7 +26,7 @@
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import deprecate, logging
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from . import StableDiffusionPipelineOutput
from .safety_checker import StableDiffusionSafetyChecker
@@ -34,7 +34,7 @@
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class StableDiffusionImageVariationPipeline(DiffusionPipeline):
+class StableDiffusionImageVariationPipeline(DiffusionPipeline, EfficiencyMixin):
r"""
Pipeline to generate image variations from an input image using Stable Diffusion.
@@ -240,34 +240,6 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype
latents = latents * self.scheduler.init_noise_sigma
return latents
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
@torch.no_grad()
def __call__(
self,
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
index 2746c6ad43ea..392de8e3c036 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
@@ -25,7 +25,6 @@
from ...image_processor import PipelineImageInput, VaeImageProcessor
from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel
-from ...models.attention_processor import FusedAttnProcessor2_0
from ...models.lora import adjust_lora_scale_text_encoder
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import (
@@ -38,7 +37,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from . import StableDiffusionPipelineOutput
from .safety_checker import StableDiffusionSafetyChecker
@@ -156,7 +155,12 @@ def retrieve_timesteps(
class StableDiffusionImg2ImgPipeline(
- DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FromSingleFileMixin
+ DiffusionPipeline,
+ EfficiencyMixin,
+ TextualInversionLoaderMixin,
+ IPAdapterMixin,
+ LoraLoaderMixin,
+ FromSingleFileMixin,
):
r"""
Pipeline for text-guided image-to-image generation using Stable Diffusion.
@@ -768,95 +772,6 @@ def prepare_latents(self, image, timestep, batch_size, num_images_per_prompt, dt
return latents
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
- # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.fuse_qkv_projections
- def fuse_qkv_projections(self, unet: bool = True, vae: bool = True):
- """
- Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
- key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
-
-
-
- This API is 🧪 experimental.
-
-
-
- Args:
- unet (`bool`, defaults to `True`): To apply fusion on the UNet.
- vae (`bool`, defaults to `True`): To apply fusion on the VAE.
- """
- self.fusing_unet = False
- self.fusing_vae = False
-
- if unet:
- self.fusing_unet = True
- self.unet.fuse_qkv_projections()
- self.unet.set_attn_processor(FusedAttnProcessor2_0())
-
- if vae:
- if not isinstance(self.vae, AutoencoderKL):
- raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.")
-
- self.fusing_vae = True
- self.vae.fuse_qkv_projections()
- self.vae.set_attn_processor(FusedAttnProcessor2_0())
-
- # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.unfuse_qkv_projections
- def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True):
- """Disable QKV projection fusion if enabled.
-
-
-
- This API is 🧪 experimental.
-
-
-
- Args:
- unet (`bool`, defaults to `True`): To apply fusion on the UNet.
- vae (`bool`, defaults to `True`): To apply fusion on the VAE.
-
- """
- if unet:
- if not self.fusing_unet:
- logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.")
- else:
- self.unet.unfuse_qkv_projections()
- self.fusing_unet = False
-
- if vae:
- if not self.fusing_vae:
- logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.")
- else:
- self.vae.unfuse_qkv_projections()
- self.fusing_vae = False
-
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
"""
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
index a8031b0a91c2..73b19d358917 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
@@ -25,12 +25,11 @@
from ...image_processor import PipelineImageInput, VaeImageProcessor
from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
from ...models import AsymmetricAutoencoderKL, AutoencoderKL, ImageProjection, UNet2DConditionModel
-from ...models.attention_processor import FusedAttnProcessor2_0
from ...models.lora import adjust_lora_scale_text_encoder
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from . import StableDiffusionPipelineOutput
from .safety_checker import StableDiffusionSafetyChecker
@@ -220,7 +219,12 @@ def retrieve_timesteps(
class StableDiffusionInpaintPipeline(
- DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FromSingleFileMixin
+ DiffusionPipeline,
+ EfficiencyMixin,
+ TextualInversionLoaderMixin,
+ IPAdapterMixin,
+ LoraLoaderMixin,
+ FromSingleFileMixin,
):
r"""
Pipeline for text-guided image inpainting using Stable Diffusion.
@@ -910,95 +914,6 @@ def get_timesteps(self, num_inference_steps, strength, device):
return timesteps, num_inference_steps - t_start
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
- # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.fuse_qkv_projections
- def fuse_qkv_projections(self, unet: bool = True, vae: bool = True):
- """
- Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
- key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
-
-
-
- This API is 🧪 experimental.
-
-
-
- Args:
- unet (`bool`, defaults to `True`): To apply fusion on the UNet.
- vae (`bool`, defaults to `True`): To apply fusion on the VAE.
- """
- self.fusing_unet = False
- self.fusing_vae = False
-
- if unet:
- self.fusing_unet = True
- self.unet.fuse_qkv_projections()
- self.unet.set_attn_processor(FusedAttnProcessor2_0())
-
- if vae:
- if not isinstance(self.vae, AutoencoderKL):
- raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.")
-
- self.fusing_vae = True
- self.vae.fuse_qkv_projections()
- self.vae.set_attn_processor(FusedAttnProcessor2_0())
-
- # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.unfuse_qkv_projections
- def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True):
- """Disable QKV projection fusion if enabled.
-
-
-
- This API is 🧪 experimental.
-
-
-
- Args:
- unet (`bool`, defaults to `True`): To apply fusion on the UNet.
- vae (`bool`, defaults to `True`): To apply fusion on the VAE.
-
- """
- if unet:
- if not self.fusing_unet:
- logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.")
- else:
- self.unet.unfuse_qkv_projections()
- self.fusing_unet = False
-
- if vae:
- if not self.fusing_vae:
- logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.")
- else:
- self.vae.unfuse_qkv_projections()
- self.fusing_vae = False
-
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
"""
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py
index f4bb8267aac7..6a522cafd19f 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py
@@ -26,7 +26,7 @@
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import PIL_INTERPOLATION, deprecate, logging
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from . import StableDiffusionPipelineOutput
from .safety_checker import StableDiffusionSafetyChecker
@@ -73,7 +73,7 @@ def retrieve_latents(
class StableDiffusionInstructPix2PixPipeline(
- DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin
+ DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin
):
r"""
Pipeline for pixel-level image editing by following text instructions (based on Stable Diffusion).
@@ -807,34 +807,6 @@ def prepare_image_latents(
return image_latents
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
@property
def guidance_scale(self):
return self._guidance_scale
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py
index 8d272fa5748c..2712b17901a3 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py
@@ -27,7 +27,7 @@
from ...schedulers import EulerDiscreteScheduler
from ...utils import deprecate, logging
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin, ImagePipelineOutput
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
@@ -60,7 +60,7 @@ def preprocess(image):
return image
-class StableDiffusionLatentUpscalePipeline(DiffusionPipeline, FromSingleFileMixin):
+class StableDiffusionLatentUpscalePipeline(DiffusionPipeline, EfficiencyMixin, FromSingleFileMixin):
r"""
Pipeline for upscaling Stable Diffusion output image resolution by a factor of 2.
@@ -258,34 +258,6 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype
latents = latents * self.scheduler.init_noise_sigma
return latents
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
@torch.no_grad()
def __call__(
self,
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
index f2b77a6d17b9..be26d67322bc 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
@@ -34,7 +34,7 @@
from ...schedulers import DDPMScheduler, KarrasDiffusionSchedulers
from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from . import StableDiffusionPipelineOutput
@@ -68,7 +68,7 @@ def preprocess(image):
class StableDiffusionUpscalePipeline(
- DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
+ DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
):
r"""
Pipeline for text-guided image super-resolution using Stable Diffusion 2.
@@ -530,34 +530,6 @@ def upcast_vae(self):
self.vae.decoder.conv_in.to(dtype)
self.vae.decoder.mid_block.to(dtype)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
@torch.no_grad()
def __call__(
self,
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py
index 8b66fa0f1972..23d9e382ac01 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py
@@ -34,7 +34,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin, ImagePipelineOutput
from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
@@ -58,7 +58,7 @@
"""
-class StableUnCLIPPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin):
+class StableUnCLIPPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
"""
Pipeline for text-to-image generation using stable unCLIP.
@@ -155,22 +155,6 @@ def __init__(
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline._encode_prompt with _encode_prompt->_encode_prior_prompt, tokenizer->prior_tokenizer, text_encoder->prior_text_encoder
def _encode_prior_prompt(
self,
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py
index feb482fb429c..0a3a3c56c6f4 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py
@@ -34,7 +34,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin, ImagePipelineOutput
from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
@@ -69,7 +69,7 @@
"""
-class StableUnCLIPImg2ImgPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin):
+class StableUnCLIPImg2ImgPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
"""
Pipeline for text-guided image-to-image generation using stable unCLIP.
@@ -156,22 +156,6 @@ def __init__(
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt(
self,
diff --git a/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py b/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py
index a6e593282996..b267d88e67e0 100644
--- a/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py
+++ b/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py
@@ -36,7 +36,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -170,7 +170,7 @@ def __call__(self, attn: Attention, hidden_states, encoder_hidden_states=None, a
return hidden_states
-class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, TextualInversionLoaderMixin):
+class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion and Attend-and-Excite.
@@ -246,22 +246,6 @@ def __init__(
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt(
self,
diff --git a/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py b/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py
index a6724e44334f..ee6b5a0ac739 100644
--- a/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py
+++ b/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py
@@ -39,7 +39,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -235,7 +235,9 @@ def preprocess_mask(mask, batch_size: int = 1):
return mask
-class StableDiffusionDiffEditPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin):
+class StableDiffusionDiffEditPipeline(
+ DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin
+):
r"""
@@ -371,39 +373,6 @@ def __init__(
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt(
self,
diff --git a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py
index 138e002bf0eb..76c2c23a3f2b 100644
--- a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py
+++ b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py
@@ -35,7 +35,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -99,7 +99,7 @@
"""
-class StableDiffusionGLIGENPipeline(DiffusionPipeline):
+class StableDiffusionGLIGENPipeline(DiffusionPipeline, EfficiencyMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion with Grounded-Language-to-Image Generation (GLIGEN).
@@ -172,35 +172,6 @@ def __init__(
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
self.register_to_config(requires_safety_checker=requires_safety_checker)
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt(
self,
diff --git a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py
index 6bd67a06cbbd..404f681c3a32 100644
--- a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py
+++ b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py
@@ -34,7 +34,7 @@
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import USE_PEFT_BACKEND, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.clip_image_project_model import CLIPImageProjection
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -145,7 +145,7 @@
"""
-class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline):
+class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, EfficiencyMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion with Grounded-Language-to-Image Generation (GLIGEN).
@@ -230,35 +230,6 @@ def __init__(
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
self.register_to_config(requires_safety_checker=requires_safety_checker)
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_prompt
def encode_prompt(
self,
diff --git a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py
index 602deeef194f..6171a22ddd70 100755
--- a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py
@@ -26,7 +26,7 @@
from ...schedulers import LMSDiscreteScheduler
from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
@@ -47,7 +47,7 @@ def apply_model(self, *args, **kwargs):
return self.model(*args, encoder_hidden_states=encoder_hidden_states, **kwargs).sample
-class StableDiffusionKDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin):
+class StableDiffusionKDiffusionPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion.
diff --git a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py
index 8b83c9aec43a..2742b8797c8f 100644
--- a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py
@@ -50,7 +50,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
@@ -91,6 +91,7 @@ def apply_model(self, *args, **kwargs):
class StableDiffusionXLKDiffusionPipeline(
DiffusionPipeline,
+ EfficiencyMixin,
FromSingleFileMixin,
StableDiffusionXLLoraLoaderMixin,
TextualInversionLoaderMixin,
@@ -196,39 +197,6 @@ def set_scheduler(self, scheduler_type: str):
raise ValueError(f"Invalid scheduler type {scheduler_type}. Please choose one of {valid_samplers}.")
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt
def encode_prompt(
self,
@@ -582,94 +550,6 @@ def upcast_vae(self):
self.vae.decoder.conv_in.to(dtype)
self.vae.decoder.mid_block.to(dtype)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.fuse_qkv_projections
- def fuse_qkv_projections(self, unet: bool = True, vae: bool = True):
- """
- Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
- key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
-
-
-
- This API is 🧪 experimental.
-
-
-
- Args:
- unet (`bool`, defaults to `True`): To apply fusion on the UNet.
- vae (`bool`, defaults to `True`): To apply fusion on the VAE.
- """
- self.fusing_unet = False
- self.fusing_vae = False
-
- if unet:
- self.fusing_unet = True
- self.unet.fuse_qkv_projections()
- self.unet.set_attn_processor(FusedAttnProcessor2_0())
-
- if vae:
- if not isinstance(self.vae, AutoencoderKL):
- raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.")
-
- self.fusing_vae = True
- self.vae.fuse_qkv_projections()
- self.vae.set_attn_processor(FusedAttnProcessor2_0())
-
- def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True):
- """Disable QKV projection fusion if enabled.
-
-
-
- This API is 🧪 experimental.
-
-
-
- Args:
- unet (`bool`, defaults to `True`): To apply fusion on the UNet.
- vae (`bool`, defaults to `True`): To apply fusion on the VAE.
-
- """
- if unet:
- if not self.fusing_unet:
- logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.")
- else:
- self.unet.unfuse_qkv_projections()
- self.fusing_unet = False
-
- if vae:
- if not self.fusing_vae:
- logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.")
- else:
- self.vae.unfuse_qkv_projections()
- self.fusing_vae = False
-
@property
def guidance_scale(self):
return self._guidance_scale
diff --git a/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py b/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py
index 6553e9786488..8f517e3d035c 100644
--- a/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py
+++ b/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py
@@ -36,7 +36,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -82,7 +82,12 @@ class LDM3DPipelineOutput(BaseOutput):
class StableDiffusionLDM3DPipeline(
- DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FromSingleFileMixin
+ DiffusionPipeline,
+ EfficiencyMixin,
+ TextualInversionLoaderMixin,
+ IPAdapterMixin,
+ LoraLoaderMixin,
+ FromSingleFileMixin,
):
r"""
Pipeline for text-to-image and 3D generation using LDM3D.
@@ -165,39 +170,6 @@ def __init__(
self.image_processor = VaeImageProcessorLDM3D(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt(
self,
diff --git a/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py b/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py
index 51e6f47b83b6..29cde4ef328b 100644
--- a/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py
+++ b/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py
@@ -32,7 +32,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -59,7 +59,9 @@
"""
-class StableDiffusionPanoramaPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin):
+class StableDiffusionPanoramaPipeline(
+ DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin
+):
r"""
Pipeline for text-to-image generation using MultiDiffusion.
@@ -140,22 +142,6 @@ def __init__(
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt(
self,
diff --git a/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py b/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py
index d72698cdc6a3..936cd5964666 100644
--- a/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py
+++ b/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py
@@ -14,7 +14,7 @@
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import deprecate, logging
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from . import StableDiffusionSafePipelineOutput
from .safety_checker import SafeStableDiffusionSafetyChecker
@@ -22,7 +22,7 @@
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class StableDiffusionPipelineSafe(DiffusionPipeline, IPAdapterMixin):
+class StableDiffusionPipelineSafe(DiffusionPipeline, EfficiencyMixin, IPAdapterMixin):
r"""
Pipeline based on the [`StableDiffusionPipeline`] for text-to-image generation using Safe Latent Diffusion.
diff --git a/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py b/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py
index 435bbca4d7d2..95a4215e6710 100644
--- a/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py
+++ b/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py
@@ -33,7 +33,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -98,7 +98,7 @@ def __call__(
# Modified to get self-attention guidance scale in this paper (https://arxiv.org/pdf/2210.00939.pdf) as an input
-class StableDiffusionSAGPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin):
+class StableDiffusionSAGPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, IPAdapterMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion.
@@ -161,22 +161,6 @@ def __init__(
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt(
self,
diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
index e90fe6571f63..508c09a42fbb 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
@@ -52,7 +52,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from .pipeline_output import StableDiffusionXLPipelineOutput
@@ -148,6 +148,7 @@ def retrieve_timesteps(
class StableDiffusionXLPipeline(
DiffusionPipeline,
+ EfficiencyMixin,
FromSingleFileMixin,
StableDiffusionXLLoraLoaderMixin,
TextualInversionLoaderMixin,
@@ -257,39 +258,6 @@ def __init__(
else:
self.watermark = None
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
def encode_prompt(
self,
prompt: str,
@@ -744,93 +712,6 @@ def upcast_vae(self):
self.vae.decoder.conv_in.to(dtype)
self.vae.decoder.mid_block.to(dtype)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
- def fuse_qkv_projections(self, unet: bool = True, vae: bool = True):
- """
- Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
- key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
-
-
-
- This API is 🧪 experimental.
-
-
-
- Args:
- unet (`bool`, defaults to `True`): To apply fusion on the UNet.
- vae (`bool`, defaults to `True`): To apply fusion on the VAE.
- """
- self.fusing_unet = False
- self.fusing_vae = False
-
- if unet:
- self.fusing_unet = True
- self.unet.fuse_qkv_projections()
- self.unet.set_attn_processor(FusedAttnProcessor2_0())
-
- if vae:
- if not isinstance(self.vae, AutoencoderKL):
- raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.")
-
- self.fusing_vae = True
- self.vae.fuse_qkv_projections()
- self.vae.set_attn_processor(FusedAttnProcessor2_0())
-
- def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True):
- """Disable QKV projection fusion if enabled.
-
-
-
- This API is 🧪 experimental.
-
-
-
- Args:
- unet (`bool`, defaults to `True`): To apply fusion on the UNet.
- vae (`bool`, defaults to `True`): To apply fusion on the VAE.
-
- """
- if unet:
- if not self.fusing_unet:
- logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.")
- else:
- self.unet.unfuse_qkv_projections()
- self.fusing_unet = False
-
- if vae:
- if not self.fusing_vae:
- logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.")
- else:
- self.vae.unfuse_qkv_projections()
- self.fusing_vae = False
-
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
"""
diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
index a6ed0768eb1b..8c71019b7647 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
@@ -35,7 +35,6 @@
from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel
from ...models.attention_processor import (
AttnProcessor2_0,
- FusedAttnProcessor2_0,
LoRAAttnProcessor2_0,
LoRAXFormersAttnProcessor,
XFormersAttnProcessor,
@@ -53,7 +52,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from .pipeline_output import StableDiffusionXLPipelineOutput
@@ -166,6 +165,7 @@ def retrieve_timesteps(
class StableDiffusionXLImg2ImgPipeline(
DiffusionPipeline,
+ EfficiencyMixin,
TextualInversionLoaderMixin,
FromSingleFileMixin,
StableDiffusionXLLoraLoaderMixin,
@@ -278,39 +278,6 @@ def __init__(
else:
self.watermark = None
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt
def encode_prompt(
self,
@@ -879,95 +846,6 @@ def upcast_vae(self):
self.vae.decoder.conv_in.to(dtype)
self.vae.decoder.mid_block.to(dtype)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
- # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.fuse_qkv_projections
- def fuse_qkv_projections(self, unet: bool = True, vae: bool = True):
- """
- Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
- key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
-
-
-
- This API is 🧪 experimental.
-
-
-
- Args:
- unet (`bool`, defaults to `True`): To apply fusion on the UNet.
- vae (`bool`, defaults to `True`): To apply fusion on the VAE.
- """
- self.fusing_unet = False
- self.fusing_vae = False
-
- if unet:
- self.fusing_unet = True
- self.unet.fuse_qkv_projections()
- self.unet.set_attn_processor(FusedAttnProcessor2_0())
-
- if vae:
- if not isinstance(self.vae, AutoencoderKL):
- raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.")
-
- self.fusing_vae = True
- self.vae.fuse_qkv_projections()
- self.vae.set_attn_processor(FusedAttnProcessor2_0())
-
- # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.unfuse_qkv_projections
- def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True):
- """Disable QKV projection fusion if enabled.
-
-
-
- This API is 🧪 experimental.
-
-
-
- Args:
- unet (`bool`, defaults to `True`): To apply fusion on the UNet.
- vae (`bool`, defaults to `True`): To apply fusion on the VAE.
-
- """
- if unet:
- if not self.fusing_unet:
- logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.")
- else:
- self.unet.unfuse_qkv_projections()
- self.fusing_unet = False
-
- if vae:
- if not self.fusing_vae:
- logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.")
- else:
- self.vae.unfuse_qkv_projections()
- self.fusing_vae = False
-
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
"""
diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
index f44d53fffaba..f45ebd273f52 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
@@ -36,7 +36,6 @@
from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel
from ...models.attention_processor import (
AttnProcessor2_0,
- FusedAttnProcessor2_0,
LoRAAttnProcessor2_0,
LoRAXFormersAttnProcessor,
XFormersAttnProcessor,
@@ -54,7 +53,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from .pipeline_output import StableDiffusionXLPipelineOutput
@@ -311,6 +310,7 @@ def retrieve_timesteps(
class StableDiffusionXLInpaintPipeline(
DiffusionPipeline,
+ EfficiencyMixin,
TextualInversionLoaderMixin,
StableDiffusionXLLoraLoaderMixin,
FromSingleFileMixin,
@@ -429,39 +429,6 @@ def __init__(
else:
self.watermark = None
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_image
def encode_image(self, image, device, num_images_per_prompt, output_hidden_states=None):
dtype = next(self.image_encoder.parameters()).dtype
@@ -1115,95 +1082,6 @@ def upcast_vae(self):
self.vae.decoder.conv_in.to(dtype)
self.vae.decoder.mid_block.to(dtype)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
- # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.fuse_qkv_projections
- def fuse_qkv_projections(self, unet: bool = True, vae: bool = True):
- """
- Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
- key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
-
-
-
- This API is 🧪 experimental.
-
-
-
- Args:
- unet (`bool`, defaults to `True`): To apply fusion on the UNet.
- vae (`bool`, defaults to `True`): To apply fusion on the VAE.
- """
- self.fusing_unet = False
- self.fusing_vae = False
-
- if unet:
- self.fusing_unet = True
- self.unet.fuse_qkv_projections()
- self.unet.set_attn_processor(FusedAttnProcessor2_0())
-
- if vae:
- if not isinstance(self.vae, AutoencoderKL):
- raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.")
-
- self.fusing_vae = True
- self.vae.fuse_qkv_projections()
- self.vae.set_attn_processor(FusedAttnProcessor2_0())
-
- # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.unfuse_qkv_projections
- def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True):
- """Disable QKV projection fusion if enabled.
-
-
-
- This API is 🧪 experimental.
-
-
-
- Args:
- unet (`bool`, defaults to `True`): To apply fusion on the UNet.
- vae (`bool`, defaults to `True`): To apply fusion on the VAE.
-
- """
- if unet:
- if not self.fusing_unet:
- logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.")
- else:
- self.unet.unfuse_qkv_projections()
- self.fusing_unet = False
-
- if vae:
- if not self.fusing_vae:
- logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.")
- else:
- self.vae.unfuse_qkv_projections()
- self.fusing_vae = False
-
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
"""
diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py
index 2e4225cf6145..280e75035f1e 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py
@@ -41,7 +41,7 @@
scale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from .pipeline_output import StableDiffusionXLPipelineOutput
@@ -118,7 +118,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
class StableDiffusionXLInstructPix2PixPipeline(
- DiffusionPipeline, TextualInversionLoaderMixin, FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin
+ DiffusionPipeline,
+ EfficiencyMixin,
+ TextualInversionLoaderMixin,
+ FromSingleFileMixin,
+ StableDiffusionXLLoraLoaderMixin,
):
r"""
Pipeline for pixel-level image editing by following text instructions. Based on Stable Diffusion XL.
@@ -205,38 +209,6 @@ def __init__(
else:
self.watermark = None
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding.
-
- When this option is enabled, the VAE will split the input tensor in slices to compute decoding in several
- steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding.
-
- When this option is enabled, the VAE will split the input tensor into tiles to compute decoding and encoding in
- several steps. This is useful to save a large amount of memory and to allow the processing of larger images.
- """
- self.vae.enable_tiling()
-
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously invoked, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
def encode_prompt(
self,
prompt: str,
@@ -621,34 +593,6 @@ def upcast_vae(self):
self.vae.decoder.conv_in.to(dtype)
self.vae.decoder.mid_block.to(dtype)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
@torch.no_grad()
@replace_example_docstring(EXAMPLE_DOC_STRING)
def __call__(
diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py
index f5d3b66f326b..271082df4c4e 100644
--- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py
+++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py
@@ -37,7 +37,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -163,7 +163,7 @@ def retrieve_timesteps(
return timesteps, num_inference_steps
-class StableDiffusionAdapterPipeline(DiffusionPipeline):
+class StableDiffusionAdapterPipeline(DiffusionPipeline, EfficiencyMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion augmented with T2I-Adapter
https://arxiv.org/abs/2302.08453
@@ -248,22 +248,6 @@ def __init__(
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt(
self,
@@ -628,34 +612,6 @@ def _default_height_width(self, height, width, image):
return height, width
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
"""
diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py
index 0c812179dac1..1b15f1ec3107 100644
--- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py
+++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py
@@ -51,7 +51,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
@@ -181,6 +181,7 @@ def retrieve_timesteps(
class StableDiffusionXLAdapterPipeline(
DiffusionPipeline,
+ EfficiencyMixin,
TextualInversionLoaderMixin,
StableDiffusionXLLoraLoaderMixin,
IPAdapterMixin,
@@ -270,39 +271,6 @@ def __init__(
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.default_sample_size = self.unet.config.sample_size
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt
def encode_prompt(
self,
@@ -788,34 +756,6 @@ def _default_height_width(self, height, width, image):
return height, width
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
"""
diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py
index eb34910b7008..db0c8e54fb6f 100644
--- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py
+++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py
@@ -33,7 +33,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from . import TextToVideoSDPipelineOutput
@@ -81,7 +81,7 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type:
return outputs
-class TextToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin):
+class TextToVideoSDPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
r"""
Pipeline for text-to-video generation.
@@ -129,39 +129,6 @@ def __init__(
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt(
self,
@@ -484,34 +451,6 @@ def prepare_latents(
latents = latents * self.scheduler.init_noise_sigma
return latents
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
@torch.no_grad()
@replace_example_docstring(EXAMPLE_DOC_STRING)
def __call__(
diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py
index 2a41d9a8f735..dcc7c6a7df0f 100644
--- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py
+++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py
@@ -34,7 +34,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from . import TextToVideoSDPipelineOutput
@@ -157,7 +157,7 @@ def preprocess_video(video):
return video
-class VideoToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin):
+class VideoToVideoSDPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
r"""
Pipeline for text-guided video-to-video generation.
@@ -205,39 +205,6 @@ def __init__(
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt(
self,
@@ -591,34 +558,6 @@ def prepare_latents(self, video, timestep, batch_size, dtype, device, generator=
return latents
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
@torch.no_grad()
@replace_example_docstring(EXAMPLE_DOC_STRING)
def __call__(
diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py
index fc34d50a50dd..991dfeee0a55 100644
--- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py
+++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py
@@ -17,7 +17,7 @@
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import USE_PEFT_BACKEND, BaseOutput, logging, scale_lora_layers, unscale_lora_layers
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from ..stable_diffusion import StableDiffusionSafetyChecker
@@ -281,7 +281,7 @@ def create_motion_field_and_warp_latents(motion_field_strength_x, motion_field_s
return warped_latents
-class TextToVideoZeroPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin):
+class TextToVideoZeroPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
r"""
Pipeline for zero-shot text-to-video generation using Stable Diffusion.
diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py
index 4fe2279a468a..afd81d04f3fe 100644
--- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py
+++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py
@@ -37,7 +37,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
if is_invisible_watermark_available():
@@ -327,6 +327,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
class TextToVideoZeroSDXLPipeline(
DiffusionPipeline,
+ EfficiencyMixin,
StableDiffusionXLLoraLoaderMixin,
TextualInversionLoaderMixin,
):
@@ -436,22 +437,6 @@ def prepare_extra_step_kwargs(self, generator, eta):
extra_step_kwargs["generator"] = generator
return extra_step_kwargs
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.upcast_vae
def upcast_vae(self):
dtype = self.vae.dtype
diff --git a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py
index 38c12edb2d43..cad7cb381e64 100644
--- a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py
+++ b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py
@@ -21,7 +21,7 @@
from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
from ...utils.outputs import BaseOutput
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from .modeling_text_decoder import UniDiffuserTextDecoder
from .modeling_uvit import UniDiffuserModel
@@ -48,7 +48,7 @@ class ImageTextPipelineOutput(BaseOutput):
text: Optional[Union[List[str], List[List[str]]]]
-class UniDiffuserPipeline(DiffusionPipeline):
+class UniDiffuserPipeline(DiffusionPipeline, EfficiencyMixin):
r"""
Pipeline for a bimodal image-text model which supports unconditional text and image generation, text-conditioned
image generation, image-conditioned text generation, and joint image-text generation.
@@ -135,39 +135,6 @@ def __init__(
# TODO: handle safety checking?
self.safety_checker = None
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
def prepare_extra_step_kwargs(self, generator, eta):
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
From 4a7fc38b7d600a1b1abb73624e770fdf780d73a5 Mon Sep 17 00:00:00 2001
From: ultranity <1095429904@qq.com>
Date: Tue, 16 Jan 2024 00:46:49 +0800
Subject: [PATCH 03/17] add mixin to rdm & restore audioldm2 & fix quality
checks
---
examples/research_projects/rdm/pipeline_rdm.py | 3 ++-
.../pipelines/audioldm2/pipeline_audioldm2.py | 16 ++++++++++++++++
.../pipeline_stable_diffusion.py | 7 ++++++-
.../pipeline_stable_diffusion_k_diffusion.py | 4 +++-
src/diffusers/utils/dummy_pt_objects.py | 15 +++++++++++++++
5 files changed, 42 insertions(+), 3 deletions(-)
diff --git a/examples/research_projects/rdm/pipeline_rdm.py b/examples/research_projects/rdm/pipeline_rdm.py
index 5398ee2e5331..dbc98d028fad 100644
--- a/examples/research_projects/rdm/pipeline_rdm.py
+++ b/examples/research_projects/rdm/pipeline_rdm.py
@@ -19,6 +19,7 @@
UNet2DConditionModel,
)
from diffusers.image_processor import VaeImageProcessor
+from diffusers.pipelines.pipeline_utils import EfficiencyMixin
from diffusers.utils import logging
from diffusers.utils.torch_utils import randn_tensor
@@ -26,7 +27,7 @@
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class RDMPipeline(DiffusionPipeline):
+class RDMPipeline(DiffusionPipeline, EfficiencyMixin):
r"""
Pipeline for text-to-image generation using Retrieval Augmented Diffusion.
diff --git a/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py b/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py
index 1622657ed161..64f93e3eefea 100644
--- a/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py
+++ b/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py
@@ -173,6 +173,22 @@ def __init__(
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
+ # Copied from diffusers.pipelines.pipeline_utils.EfficiencyMixin.enable_vae_slicing
+ def enable_vae_slicing(self):
+ r"""
+ Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
+ compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
+ """
+ self.vae.enable_slicing()
+
+ # Copied from diffusers.pipelines.pipeline_utils.EfficiencyMixin.disable_vae_slicing
+ def disable_vae_slicing(self):
+ r"""
+ Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
+ computing decoding in one step.
+ """
+ self.vae.disable_slicing()
+
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index c421f9d9d3f3..860dfeeb85af 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -115,7 +115,12 @@ def retrieve_timesteps(
class StableDiffusionPipeline(
- DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin
+ DiffusionPipeline,
+ EfficiencyMixin,
+ TextualInversionLoaderMixin,
+ LoraLoaderMixin,
+ IPAdapterMixin,
+ FromSingleFileMixin,
):
r"""
Pipeline for text-to-image generation using Stable Diffusion.
diff --git a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py
index 6171a22ddd70..ea1beb6788c3 100755
--- a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py
@@ -47,7 +47,9 @@ def apply_model(self, *args, **kwargs):
return self.model(*args, encoder_hidden_states=encoder_hidden_states, **kwargs).sample
-class StableDiffusionKDiffusionPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
+class StableDiffusionKDiffusionPipeline(
+ DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin
+):
r"""
Pipeline for text-to-image generation using Stable Diffusion.
diff --git a/src/diffusers/utils/dummy_pt_objects.py b/src/diffusers/utils/dummy_pt_objects.py
index a4f5436038ea..19f5ac445a9e 100644
--- a/src/diffusers/utils/dummy_pt_objects.py
+++ b/src/diffusers/utils/dummy_pt_objects.py
@@ -570,6 +570,21 @@ def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch"])
+class EfficiencyMixin(metaclass=DummyObject):
+ _backends = ["torch"]
+
+ def __init__(self, *args, **kwargs):
+ requires_backends(self, ["torch"])
+
+ @classmethod
+ def from_config(cls, *args, **kwargs):
+ requires_backends(cls, ["torch"])
+
+ @classmethod
+ def from_pretrained(cls, *args, **kwargs):
+ requires_backends(cls, ["torch"])
+
+
class ImagePipelineOutput(metaclass=DummyObject):
_backends = ["torch"]
From cc4f805b07e08b5e4a743a64c9e64ce7ed1aaf75 Mon Sep 17 00:00:00 2001
From: ultranity <1095429904@qq.com>
Date: Mon, 19 Feb 2024 21:42:01 +0800
Subject: [PATCH 04/17] rebase on main branch
---
examples/community/ip_adapter_face_id.py | 126 +-----------------
.../pipeline_animatediff_img2video.py | 67 +---------
.../pipeline_stable_diffusion_xl_ipex.py | 61 ---------
.../models/unets/unet_3d_condition.py | 39 ++++++
src/diffusers/models/unets/unet_i2vgen_xl.py | 38 ++++++
.../models/unets/unet_motion_model.py | 39 ++++++
.../animatediff/pipeline_animatediff.py | 2 -
.../pipeline_animatediff_video2video.py | 65 +--------
.../pipelines/i2vgen_xl/pipeline_i2vgen_xl.py | 68 +---------
src/diffusers/pipelines/pia/pipeline_pia.py | 71 ++--------
.../pipeline_stable_diffusion_img2img.py | 33 -----
.../pipeline_stable_diffusion_inpaint.py | 33 -----
12 files changed, 142 insertions(+), 500 deletions(-)
diff --git a/examples/community/ip_adapter_face_id.py b/examples/community/ip_adapter_face_id.py
index dfd6a9df6eb1..f92b91d803c5 100644
--- a/examples/community/ip_adapter_face_id.py
+++ b/examples/community/ip_adapter_face_id.py
@@ -26,9 +26,8 @@
from diffusers.image_processor import VaeImageProcessor
from diffusers.loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
from diffusers.models import AutoencoderKL, UNet2DConditionModel
-from diffusers.models.attention_processor import FusedAttnProcessor2_0
from diffusers.models.lora import LoRALinearLayer, adjust_lora_scale_text_encoder
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin
from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import KarrasDiffusionSchedulers
@@ -415,7 +414,12 @@ def retrieve_timesteps(
class IPAdapterFaceIDStableDiffusionPipeline(
- DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin
+ DiffusionPipeline,
+ EfficiencyMixin,
+ TextualInversionLoaderMixin,
+ LoraLoaderMixin,
+ IPAdapterMixin,
+ FromSingleFileMixin,
):
r"""
Pipeline for text-to-image generation using Stable Diffusion.
@@ -727,35 +731,6 @@ def set_ip_adapter_scale(self, scale):
if isinstance(attn_processor, (LoRAIPAdapterAttnProcessor, LoRAIPAdapterAttnProcessor2_0)):
attn_processor.scale = scale
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
def _encode_prompt(
self,
prompt,
@@ -1080,93 +1055,6 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype
latents = latents * self.scheduler.init_noise_sigma
return latents
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
- # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.fuse_qkv_projections
- def fuse_qkv_projections(self, unet: bool = True, vae: bool = True):
- """
- Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
- key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
-
-
-
- This API is 🧪 experimental.
-
-
-
- Args:
- unet (`bool`, defaults to `True`): To apply fusion on the UNet.
- vae (`bool`, defaults to `True`): To apply fusion on the VAE.
- """
- self.fusing_unet = False
- self.fusing_vae = False
-
- if unet:
- self.fusing_unet = True
- self.unet.fuse_qkv_projections()
- self.unet.set_attn_processor(FusedAttnProcessor2_0())
-
- if vae:
- if not isinstance(self.vae, AutoencoderKL):
- raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.")
-
- self.fusing_vae = True
- self.vae.fuse_qkv_projections()
- self.vae.set_attn_processor(FusedAttnProcessor2_0())
-
- # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.unfuse_qkv_projections
- def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True):
- """Disable QKV projection fusion if enabled.
-
-
-
- This API is 🧪 experimental.
-
-
-
- Args:
- unet (`bool`, defaults to `True`): To apply fusion on the UNet.
- vae (`bool`, defaults to `True`): To apply fusion on the VAE.
-
- """
- if unet:
- if not self.fusing_unet:
- logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.")
- else:
- self.unet.unfuse_qkv_projections()
- self.fusing_unet = False
-
- if vae:
- if not self.fusing_vae:
- logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.")
- else:
- self.vae.unfuse_qkv_projections()
- self.fusing_vae = False
-
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
"""
diff --git a/examples/community/pipeline_animatediff_img2video.py b/examples/community/pipeline_animatediff_img2video.py
index 826742f9afc8..d666e554f07d 100644
--- a/examples/community/pipeline_animatediff_img2video.py
+++ b/examples/community/pipeline_animatediff_img2video.py
@@ -26,7 +26,7 @@
from diffusers.models import AutoencoderKL, ImageProjection, UNet2DConditionModel, UNetMotionModel
from diffusers.models.lora import adjust_lora_scale_text_encoder
from diffusers.models.unet_motion_model import MotionAdapter
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin
from diffusers.schedulers import (
DDIMScheduler,
DPMSolverMultistepScheduler,
@@ -230,7 +230,9 @@ class AnimateDiffImgToVideoPipelineOutput(BaseOutput):
frames: Union[torch.Tensor, np.ndarray]
-class AnimateDiffImgToVideoPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin):
+class AnimateDiffImgToVideoPipeline(
+ DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin
+):
r"""
Pipeline for text-to-video generation.
@@ -527,67 +529,6 @@ def decode_latents(self, latents):
video = video.float()
return video
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
def prepare_extra_step_kwargs(self, generator, eta):
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
diff --git a/examples/community/pipeline_stable_diffusion_xl_ipex.py b/examples/community/pipeline_stable_diffusion_xl_ipex.py
index c57d58bb58ba..68ad5dbec77d 100644
--- a/examples/community/pipeline_stable_diffusion_xl_ipex.py
+++ b/examples/community/pipeline_stable_diffusion_xl_ipex.py
@@ -267,39 +267,6 @@ def __init__(
else:
self.watermark = None
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
def encode_prompt(
self,
prompt: str,
@@ -701,34 +668,6 @@ def upcast_vae(self):
self.vae.decoder.conv_in.to(dtype)
self.vae.decoder.mid_block.to(dtype)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
"""
diff --git a/src/diffusers/models/unets/unet_3d_condition.py b/src/diffusers/models/unets/unet_3d_condition.py
index 1d5bd57cf8e0..b7641a96a7a1 100644
--- a/src/diffusers/models/unets/unet_3d_condition.py
+++ b/src/diffusers/models/unets/unet_3d_condition.py
@@ -27,6 +27,7 @@
from ..attention_processor import (
ADDED_KV_ATTENTION_PROCESSORS,
CROSS_ATTENTION_PROCESSORS,
+ Attention,
AttentionProcessor,
AttnAddedKVProcessor,
AttnProcessor,
@@ -503,6 +504,44 @@ def disable_freeu(self):
if hasattr(upsample_block, k) or getattr(upsample_block, k, None) is not None:
setattr(upsample_block, k, None)
+ # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.fuse_qkv_projections
+ def fuse_qkv_projections(self):
+ """
+ Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
+ key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
+
+
+
+ This API is 🧪 experimental.
+
+
+ """
+ self.original_attn_processors = None
+
+ for _, attn_processor in self.attn_processors.items():
+ if "Added" in str(attn_processor.__class__.__name__):
+ raise ValueError("`fuse_qkv_projections()` is not supported for models having added KV projections.")
+
+ self.original_attn_processors = self.attn_processors
+
+ for module in self.modules():
+ if isinstance(module, Attention):
+ module.fuse_projections(fuse=True)
+
+ # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections
+ def unfuse_qkv_projections(self):
+ """Disables the fused QKV projection if enabled.
+
+
+
+ This API is 🧪 experimental.
+
+
+
+ """
+ if self.original_attn_processors is not None:
+ self.set_attn_processor(self.original_attn_processors)
+
# Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unload_lora
def unload_lora(self):
"""Unloads LoRA weights."""
diff --git a/src/diffusers/models/unets/unet_i2vgen_xl.py b/src/diffusers/models/unets/unet_i2vgen_xl.py
index 5dce87254986..a096f842ab6c 100644
--- a/src/diffusers/models/unets/unet_i2vgen_xl.py
+++ b/src/diffusers/models/unets/unet_i2vgen_xl.py
@@ -474,6 +474,44 @@ def disable_freeu(self):
if hasattr(upsample_block, k) or getattr(upsample_block, k, None) is not None:
setattr(upsample_block, k, None)
+ # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.fuse_qkv_projections
+ def fuse_qkv_projections(self):
+ """
+ Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
+ key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
+
+
+
+ This API is 🧪 experimental.
+
+
+ """
+ self.original_attn_processors = None
+
+ for _, attn_processor in self.attn_processors.items():
+ if "Added" in str(attn_processor.__class__.__name__):
+ raise ValueError("`fuse_qkv_projections()` is not supported for models having added KV projections.")
+
+ self.original_attn_processors = self.attn_processors
+
+ for module in self.modules():
+ if isinstance(module, Attention):
+ module.fuse_projections(fuse=True)
+
+ # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections
+ def unfuse_qkv_projections(self):
+ """Disables the fused QKV projection if enabled.
+
+
+
+ This API is 🧪 experimental.
+
+
+
+ """
+ if self.original_attn_processors is not None:
+ self.set_attn_processor(self.original_attn_processors)
+
def forward(
self,
sample: torch.FloatTensor,
diff --git a/src/diffusers/models/unets/unet_motion_model.py b/src/diffusers/models/unets/unet_motion_model.py
index 9cb0f42c85ef..ebdddf09bd63 100644
--- a/src/diffusers/models/unets/unet_motion_model.py
+++ b/src/diffusers/models/unets/unet_motion_model.py
@@ -23,6 +23,7 @@
from ..attention_processor import (
ADDED_KV_ATTENTION_PROCESSORS,
CROSS_ATTENTION_PROCESSORS,
+ Attention,
AttentionProcessor,
AttnAddedKVProcessor,
AttnProcessor,
@@ -700,6 +701,44 @@ def disable_freeu(self) -> None:
if hasattr(upsample_block, k) or getattr(upsample_block, k, None) is not None:
setattr(upsample_block, k, None)
+ # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.fuse_qkv_projections
+ def fuse_qkv_projections(self):
+ """
+ Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
+ key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
+
+
+
+ This API is 🧪 experimental.
+
+
+ """
+ self.original_attn_processors = None
+
+ for _, attn_processor in self.attn_processors.items():
+ if "Added" in str(attn_processor.__class__.__name__):
+ raise ValueError("`fuse_qkv_projections()` is not supported for models having added KV projections.")
+
+ self.original_attn_processors = self.attn_processors
+
+ for module in self.modules():
+ if isinstance(module, Attention):
+ module.fuse_projections(fuse=True)
+
+ # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections
+ def unfuse_qkv_projections(self):
+ """Disables the fused QKV projection if enabled.
+
+
+
+ This API is 🧪 experimental.
+
+
+
+ """
+ if self.original_attn_processors is not None:
+ self.set_attn_processor(self.original_attn_processors)
+
def forward(
self,
sample: torch.FloatTensor,
diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py
index cbdab46cb8e4..050475a1ad26 100644
--- a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py
+++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py
@@ -87,9 +87,7 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type:
class AnimateDiffPipeline(
-
DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FreeInitMixin
-
):
r"""
Pipeline for text-to-video generation.
diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py
index f5ada63dfdfc..599f0497da97 100644
--- a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py
+++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py
@@ -35,7 +35,7 @@
from ...utils import USE_PEFT_BACKEND, logging, scale_lora_layers, unscale_lora_layers
from ...utils.torch_utils import randn_tensor
from ..free_init_utils import FreeInitMixin
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
from .pipeline_output import AnimateDiffPipelineOutput
@@ -165,7 +165,7 @@ def retrieve_timesteps(
class AnimateDiffVideoToVideoPipeline(
- DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FreeInitMixin
+ DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FreeInitMixin
):
r"""
Pipeline for video-to-video generation.
@@ -454,67 +454,6 @@ def decode_latents(self, latents):
video = video.float()
return video
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
def prepare_extra_step_kwargs(self, generator, eta):
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
diff --git a/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py b/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py
index 5354f6643cb7..ac1f44a2f348 100644
--- a/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py
+++ b/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py
@@ -31,7 +31,7 @@
replace_example_docstring,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
@@ -103,7 +103,10 @@ class I2VGenXLPipelineOutput(BaseOutput):
frames: Union[torch.Tensor, np.ndarray, List[List[PIL.Image.Image]]]
-class I2VGenXLPipeline(DiffusionPipeline):
+class I2VGenXLPipeline(
+ DiffusionPipeline,
+ EfficiencyMixin,
+):
r"""
Pipeline for image-to-video generation as proposed in [I2VGenXL](https://i2vgen-xl.github.io/).
@@ -161,39 +164,6 @@ def guidance_scale(self):
def do_classifier_free_guidance(self):
return self._guidance_scale > 1
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
def encode_prompt(
self,
prompt,
@@ -542,34 +512,6 @@ def prepare_latents(
latents = latents * self.scheduler.init_noise_sigma
return latents
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
@torch.no_grad()
@replace_example_docstring(EXAMPLE_DOC_STRING)
def __call__(
diff --git a/src/diffusers/pipelines/pia/pipeline_pia.py b/src/diffusers/pipelines/pia/pipeline_pia.py
index 1a385ea462c6..90468b0a2127 100644
--- a/src/diffusers/pipelines/pia/pipeline_pia.py
+++ b/src/diffusers/pipelines/pia/pipeline_pia.py
@@ -46,7 +46,7 @@
)
from ...utils.torch_utils import randn_tensor
from ..free_init_utils import FreeInitMixin
-from ..pipeline_utils import DiffusionPipeline
+from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
@@ -211,7 +211,13 @@ class PIAPipelineOutput(BaseOutput):
class PIAPipeline(
- DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FromSingleFileMixin, FreeInitMixin
+ DiffusionPipeline,
+ EfficiencyMixin,
+ TextualInversionLoaderMixin,
+ IPAdapterMixin,
+ LoraLoaderMixin,
+ FromSingleFileMixin,
+ FreeInitMixin,
):
r"""
Pipeline for text-to-video generation.
@@ -500,67 +506,6 @@ def decode_latents(self, latents):
video = video.float()
return video
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
- def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
- r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
-
- The suffixes after the scaling factors represent the stages where they are being applied.
-
- Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
- that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
-
- Args:
- s1 (`float`):
- Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- s2 (`float`):
- Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
- mitigate "oversmoothing effect" in the enhanced denoising process.
- b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
- b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
- """
- if not hasattr(self, "unet"):
- raise ValueError("The pipeline must have `unet` for using FreeU.")
- self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
- def disable_freeu(self):
- """Disables the FreeU mechanism if enabled."""
- self.unet.disable_freeu()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
def prepare_extra_step_kwargs(self, generator, eta):
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
index 392de8e3c036..6df7d62d7c9b 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
@@ -292,39 +292,6 @@ def __init__(
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt(
self,
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
index 73b19d358917..6652aecdcb5f 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
@@ -364,39 +364,6 @@ def __init__(
)
self.register_to_config(requires_safety_checker=requires_safety_checker)
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt(
self,
From fc71e97477e85c53b0f8f8cccf152561826d78b5 Mon Sep 17 00:00:00 2001
From: ultranity <1095429904@qq.com>
Date: Mon, 19 Feb 2024 22:23:18 +0800
Subject: [PATCH 05/17] init PipelineEfficiencyFunctionTesterMixin
---
.../test_stable_diffusion.py | 13 +-
.../test_stable_diffusion_xl.py | 44 ++----
tests/pipelines/test_pipelines_common.py | 129 ++++++++++++++++++
3 files changed, 151 insertions(+), 35 deletions(-)
diff --git a/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py b/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py
index dcc4dadf992b..a11ca7b4a233 100644
--- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py
+++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py
@@ -52,14 +52,23 @@
TEXT_TO_IMAGE_IMAGE_PARAMS,
TEXT_TO_IMAGE_PARAMS,
)
-from ..test_pipelines_common import PipelineKarrasSchedulerTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin
+from ..test_pipelines_common import (
+ PipelineEfficiencyFunctionTesterMixin,
+ PipelineKarrasSchedulerTesterMixin,
+ PipelineLatentTesterMixin,
+ PipelineTesterMixin,
+)
enable_full_determinism()
class StableDiffusion2PipelineFastTests(
- PipelineLatentTesterMixin, PipelineKarrasSchedulerTesterMixin, PipelineTesterMixin, unittest.TestCase
+ PipelineEfficiencyFunctionTesterMixin,
+ PipelineLatentTesterMixin,
+ PipelineKarrasSchedulerTesterMixin,
+ PipelineTesterMixin,
+ unittest.TestCase,
):
pipeline_class = StableDiffusionPipeline
params = TEXT_TO_IMAGE_PARAMS
diff --git a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py
index 16ef7e3009bd..b9e01f598cde 100644
--- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py
+++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py
@@ -49,14 +49,23 @@
TEXT_TO_IMAGE_IMAGE_PARAMS,
TEXT_TO_IMAGE_PARAMS,
)
-from ..test_pipelines_common import PipelineLatentTesterMixin, PipelineTesterMixin, SDXLOptionalComponentsTesterMixin
+from ..test_pipelines_common import (
+ PipelineEfficiencyFunctionTesterMixin,
+ PipelineLatentTesterMixin,
+ PipelineTesterMixin,
+ SDXLOptionalComponentsTesterMixin,
+)
enable_full_determinism()
class StableDiffusionXLPipelineFastTests(
- PipelineLatentTesterMixin, PipelineTesterMixin, SDXLOptionalComponentsTesterMixin, unittest.TestCase
+ PipelineEfficiencyFunctionTesterMixin,
+ PipelineLatentTesterMixin,
+ PipelineTesterMixin,
+ SDXLOptionalComponentsTesterMixin,
+ unittest.TestCase,
):
pipeline_class = StableDiffusionXLPipeline
params = TEXT_TO_IMAGE_PARAMS
@@ -939,37 +948,6 @@ def test_stable_diffusion_xl_save_from_pretrained(self):
assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3
- def test_stable_diffusion_xl_with_fused_qkv_projections(self):
- device = "cpu" # ensure determinism for the device-dependent torch.Generator
- components = self.get_dummy_components()
- sd_pipe = StableDiffusionXLPipeline(**components)
- sd_pipe = sd_pipe.to(device)
- sd_pipe.set_progress_bar_config(disable=None)
-
- inputs = self.get_dummy_inputs(device)
- image = sd_pipe(**inputs).images
- original_image_slice = image[0, -3:, -3:, -1]
-
- sd_pipe.fuse_qkv_projections()
- inputs = self.get_dummy_inputs(device)
- image = sd_pipe(**inputs).images
- image_slice_fused = image[0, -3:, -3:, -1]
-
- sd_pipe.unfuse_qkv_projections()
- inputs = self.get_dummy_inputs(device)
- image = sd_pipe(**inputs).images
- image_slice_disabled = image[0, -3:, -3:, -1]
-
- assert np.allclose(
- original_image_slice, image_slice_fused, atol=1e-2, rtol=1e-2
- ), "Fusion of QKV projections shouldn't affect the outputs."
- assert np.allclose(
- image_slice_fused, image_slice_disabled, atol=1e-2, rtol=1e-2
- ), "Outputs, with QKV projection fusion enabled, shouldn't change when fused QKV projections are disabled."
- assert np.allclose(
- original_image_slice, image_slice_disabled, atol=1e-2, rtol=1e-2
- ), "Original outputs should match when fused QKV projections are disabled."
-
def test_pipeline_interrupt(self):
components = self.get_dummy_components()
sd_pipe = StableDiffusionXLPipeline(**components)
diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py
index 7f51847caf07..58929296e34d 100644
--- a/tests/pipelines/test_pipelines_common.py
+++ b/tests/pipelines/test_pipelines_common.py
@@ -59,6 +59,135 @@ def check_same_shape(tensor_list):
return all(shape == shapes[0] for shape in shapes[1:])
+class PipelineEfficiencyFunctionTesterMixin:
+ """
+ This mixin is designed to be used with PipelineTesterMixin and unittest.TestCase classes.
+ It provides a set of common tests for PyTorch pipeline that inherit from EfficiencyMixin, e.g. vae_slicing, vae_tiling, freeu, etc.
+ """
+
+ def test_vae_slicing(self):
+ device = "cpu" # ensure determinism for the device-dependent torch.Generator
+ components = self.get_dummy_components()
+ # components["scheduler"] = LMSDiscreteScheduler.from_config(components["scheduler"].config)
+ pipe = self.pipeline_class(**components)
+ pipe = pipe.to(device)
+ pipe.set_progress_bar_config(disable=None)
+
+ image_count = 4
+
+ inputs = self.get_dummy_inputs(device)
+ inputs["prompt"] = [inputs["prompt"]] * image_count
+ output_1 = pipe(**inputs)
+
+ # make sure sliced vae decode yields the same result
+ pipe.enable_vae_slicing()
+ inputs = self.get_dummy_inputs(device)
+ inputs["prompt"] = [inputs["prompt"]] * image_count
+ output_2 = pipe(**inputs)
+
+ # there is a small discrepancy at image borders vs. full batch decode
+ assert np.abs(output_2.images.flatten() - output_1.images.flatten()).max() < 3e-3
+
+ def test_vae_tiling(self):
+ device = "cpu" # ensure determinism for the device-dependent torch.Generator
+ components = self.get_dummy_components()
+
+ # make sure here that pndm scheduler skips prk
+ components["safety_checker"] = None
+ pipe = self.pipeline_class(**components)
+ pipe = pipe.to(device)
+ pipe.set_progress_bar_config(disable=None)
+
+ prompt = "A painting of a squirrel eating a burger"
+
+ # Test that tiled decode at 512x512 yields the same result as the non-tiled decode
+ generator = torch.Generator(device=device).manual_seed(0)
+ output_1 = pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np")
+
+ # make sure tiled vae decode yields the same result
+ pipe.enable_vae_tiling()
+ generator = torch.Generator(device=device).manual_seed(0)
+ output_2 = pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np")
+
+ assert np.abs(output_2.images.flatten() - output_1.images.flatten()).max() < 5e-1
+
+ # test that tiled decode works with various shapes
+ shapes = [(1, 4, 73, 97), (1, 4, 97, 73), (1, 4, 49, 65), (1, 4, 65, 49)]
+ for shape in shapes:
+ zeros = torch.zeros(shape).to(device)
+ pipe.vae.decode(zeros)
+
+ def test_freeu_enabled(self):
+ components = self.get_dummy_components()
+ pipe = self.pipeline_class(**components)
+ pipe = pipe.to(torch_device)
+ pipe.set_progress_bar_config(disable=None)
+
+ prompt = "hey"
+ output = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0)).images
+
+ pipe.enable_freeu(s1=0.9, s2=0.2, b1=1.2, b2=1.4)
+ output_freeu = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0)).images
+
+ assert not np.allclose(
+ output[0, -3:, -3:, -1], output_freeu[0, -3:, -3:, -1]
+ ), "Enabling of FreeU should lead to different results."
+
+ def test_freeu_disabled(self):
+ components = self.get_dummy_components()
+ pipe = self.pipeline_class(**components)
+ pipe = pipe.to(torch_device)
+ pipe.set_progress_bar_config(disable=None)
+
+ prompt = "hey"
+ output = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0)).images
+
+ pipe.enable_freeu(s1=0.9, s2=0.2, b1=1.2, b2=1.4)
+ pipe.disable_freeu()
+
+ freeu_keys = {"s1", "s2", "b1", "b2"}
+ for upsample_block in pipe.unet.up_blocks:
+ for key in freeu_keys:
+ assert getattr(upsample_block, key) is None, f"Disabling of FreeU should have set {key} to None."
+
+ output_no_freeu = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0)).images
+
+ assert np.allclose(
+ output[0, -3:, -3:, -1], output_no_freeu[0, -3:, -3:, -1]
+ ), "Disabling of FreeU should lead to results similar to the default pipeline results."
+
+ def test_fused_qkv_projections(self):
+ device = "cpu" # ensure determinism for the device-dependent torch.Generator
+ components = self.get_dummy_components()
+ pipe = self.pipeline_class(**components)
+ pipe = pipe.to(device)
+ pipe.set_progress_bar_config(disable=None)
+
+ inputs = self.get_dummy_inputs(device)
+ image = pipe(**inputs).images
+ original_image_slice = image[0, -3:, -3:, -1]
+
+ pipe.fuse_qkv_projections()
+ inputs = self.get_dummy_inputs(device)
+ image = pipe(**inputs).images
+ image_slice_fused = image[0, -3:, -3:, -1]
+
+ pipe.unfuse_qkv_projections()
+ inputs = self.get_dummy_inputs(device)
+ image = pipe(**inputs).images
+ image_slice_disabled = image[0, -3:, -3:, -1]
+
+ assert np.allclose(
+ original_image_slice, image_slice_fused, atol=1e-2, rtol=1e-2
+ ), "Fusion of QKV projections shouldn't affect the outputs."
+ assert np.allclose(
+ image_slice_fused, image_slice_disabled, atol=1e-2, rtol=1e-2
+ ), "Outputs, with QKV projection fusion enabled, shouldn't change when fused QKV projections are disabled."
+ assert np.allclose(
+ original_image_slice, image_slice_disabled, atol=1e-2, rtol=1e-2
+ ), "Original outputs should match when fused QKV projections are disabled."
+
+
class PipelineLatentTesterMixin:
"""
This mixin is designed to be used with PipelineTesterMixin and unittest.TestCase classes.
From 95f53e685eb4930e281a338dffcae7ab3db1f262 Mon Sep 17 00:00:00 2001
From: ultranity <1095429904@qq.com>
Date: Wed, 21 Feb 2024 10:46:09 +0800
Subject: [PATCH 06/17] rename EfficiencyMixin to LatentDiffusionMixin
---
examples/community/composable_stable_diffusion.py | 4 ++--
examples/community/gluegen.py | 4 ++--
examples/community/ip_adapter_face_id.py | 4 ++--
examples/community/latent_consistency_interpolate.py | 4 ++--
examples/community/llm_grounded_diffusion.py | 4 ++--
examples/community/lpw_stable_diffusion.py | 4 ++--
examples/community/lpw_stable_diffusion_xl.py | 4 ++--
examples/community/pipeline_animatediff_controlnet.py | 4 ++--
examples/community/pipeline_animatediff_img2video.py | 4 ++--
examples/community/pipeline_demofusion_sdxl.py | 4 ++--
examples/community/pipeline_sdxl_style_aligned.py | 6 +++---
.../pipeline_stable_diffusion_xl_controlnet_adapter.py | 4 ++--
...ine_stable_diffusion_xl_controlnet_adapter_inpaint.py | 4 ++--
examples/community/pipeline_zero1to3.py | 6 +++---
examples/community/sd_text2img_k_diffusion.py | 4 ++--
examples/community/seed_resize_stable_diffusion.py | 4 ++--
examples/community/speech_to_image_diffusion.py | 4 ++--
examples/community/stable_diffusion_comparison.py | 4 ++--
examples/community/stable_diffusion_ipex.py | 6 ++++--
examples/community/stable_diffusion_mega.py | 4 ++--
examples/community/stable_diffusion_repaint.py | 6 ++++--
examples/community/text_inpainting.py | 4 ++--
.../controlnetxs/pipeline_controlnet_xs.py | 4 ++--
.../controlnetxs/pipeline_controlnet_xs_sd_xl.py | 4 ++--
examples/research_projects/rdm/pipeline_rdm.py | 4 ++--
src/diffusers/__init__.py | 4 ++--
src/diffusers/pipelines/__init__.py | 4 ++--
.../pipelines/animatediff/pipeline_animatediff.py | 9 +++++++--
.../animatediff/pipeline_animatediff_video2video.py | 9 +++++++--
src/diffusers/pipelines/audioldm/pipeline_audioldm.py | 4 ++--
src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py | 4 ++--
.../pipelines/controlnet/pipeline_controlnet.py | 4 ++--
.../pipelines/controlnet/pipeline_controlnet_img2img.py | 4 ++--
.../pipelines/controlnet/pipeline_controlnet_inpaint.py | 4 ++--
.../controlnet/pipeline_controlnet_inpaint_sd_xl.py | 4 ++--
.../pipelines/controlnet/pipeline_controlnet_sd_xl.py | 4 ++--
.../controlnet/pipeline_controlnet_sd_xl_img2img.py | 8 ++++++--
.../deprecated/alt_diffusion/pipeline_alt_diffusion.py | 4 ++--
.../alt_diffusion/pipeline_alt_diffusion_img2img.py | 4 ++--
.../pipeline_stable_diffusion_model_editing.py | 4 ++--
.../pipeline_stable_diffusion_paradigms.py | 4 ++--
src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py | 4 ++--
.../pipeline_latent_consistency_img2img.py | 4 ++--
.../pipeline_latent_consistency_text2img.py | 4 ++--
src/diffusers/pipelines/musicldm/pipeline_musicldm.py | 4 ++--
.../paint_by_example/pipeline_paint_by_example.py | 4 ++--
src/diffusers/pipelines/pia/pipeline_pia.py | 4 ++--
src/diffusers/pipelines/pipeline_utils.py | 4 ++--
.../pipeline_semantic_stable_diffusion.py | 4 ++--
.../stable_diffusion/pipeline_stable_diffusion.py | 4 ++--
.../pipeline_stable_diffusion_image_variation.py | 4 ++--
.../pipeline_stable_diffusion_img2img.py | 4 ++--
.../pipeline_stable_diffusion_inpaint.py | 4 ++--
.../pipeline_stable_diffusion_instruct_pix2pix.py | 4 ++--
.../pipeline_stable_diffusion_latent_upscale.py | 4 ++--
.../pipeline_stable_diffusion_upscale.py | 4 ++--
.../pipelines/stable_diffusion/pipeline_stable_unclip.py | 4 ++--
.../stable_diffusion/pipeline_stable_unclip_img2img.py | 6 ++++--
.../pipeline_stable_diffusion_attend_and_excite.py | 4 ++--
.../pipeline_stable_diffusion_diffedit.py | 4 ++--
.../pipeline_stable_diffusion_gligen.py | 4 ++--
.../pipeline_stable_diffusion_gligen_text_image.py | 4 ++--
.../pipeline_stable_diffusion_k_diffusion.py | 4 ++--
.../pipeline_stable_diffusion_xl_k_diffusion.py | 4 ++--
.../pipeline_stable_diffusion_ldm3d.py | 4 ++--
.../pipeline_stable_diffusion_panorama.py | 4 ++--
.../pipeline_stable_diffusion_safe.py | 4 ++--
.../pipeline_stable_diffusion_sag.py | 4 ++--
.../stable_diffusion_xl/pipeline_stable_diffusion_xl.py | 4 ++--
.../pipeline_stable_diffusion_xl_img2img.py | 4 ++--
.../pipeline_stable_diffusion_xl_inpaint.py | 4 ++--
.../pipeline_stable_diffusion_xl_instruct_pix2pix.py | 4 ++--
.../t2i_adapter/pipeline_stable_diffusion_adapter.py | 4 ++--
.../t2i_adapter/pipeline_stable_diffusion_xl_adapter.py | 4 ++--
.../pipeline_text_to_video_synth.py | 4 ++--
.../pipeline_text_to_video_synth_img2img.py | 4 ++--
.../pipeline_text_to_video_zero.py | 4 ++--
.../pipeline_text_to_video_zero_sdxl.py | 4 ++--
.../pipelines/unidiffuser/pipeline_unidiffuser.py | 4 ++--
src/diffusers/utils/dummy_pt_objects.py | 2 +-
.../stable_diffusion_2/test_stable_diffusion.py | 4 ++--
.../stable_diffusion_xl/test_stable_diffusion_xl.py | 4 ++--
tests/pipelines/test_pipelines_common.py | 4 ++--
83 files changed, 187 insertions(+), 167 deletions(-)
diff --git a/examples/community/composable_stable_diffusion.py b/examples/community/composable_stable_diffusion.py
index eb099f9398b9..c7b91f94f294 100644
--- a/examples/community/composable_stable_diffusion.py
+++ b/examples/community/composable_stable_diffusion.py
@@ -22,7 +22,7 @@
from diffusers import DiffusionPipeline
from diffusers.configuration_utils import FrozenDict
from diffusers.models import AutoencoderKL, UNet2DConditionModel
-from diffusers.pipelines.pipeline_utils import EfficiencyMixin
+from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import (
@@ -39,7 +39,7 @@
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class ComposableStableDiffusionPipeline(DiffusionPipeline, EfficiencyMixin):
+class ComposableStableDiffusionPipeline(DiffusionPipeline, LatentDiffusionMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion.
diff --git a/examples/community/gluegen.py b/examples/community/gluegen.py
index 19cbf6cb3b82..f0ace91d683c 100644
--- a/examples/community/gluegen.py
+++ b/examples/community/gluegen.py
@@ -10,7 +10,7 @@
from diffusers.loaders import LoraLoaderMixin
from diffusers.models import AutoencoderKL, UNet2DConditionModel
from diffusers.models.lora import adjust_lora_scale_text_encoder
-from diffusers.pipelines.pipeline_utils import EfficiencyMixin
+from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import KarrasDiffusionSchedulers
@@ -194,7 +194,7 @@ def retrieve_timesteps(
return timesteps, num_inference_steps
-class GlueGenStableDiffusionPipeline(DiffusionPipeline, EfficiencyMixin, LoraLoaderMixin):
+class GlueGenStableDiffusionPipeline(DiffusionPipeline, LatentDiffusionMixin, LoraLoaderMixin):
def __init__(
self,
vae: AutoencoderKL,
diff --git a/examples/community/ip_adapter_face_id.py b/examples/community/ip_adapter_face_id.py
index f92b91d803c5..d1fa98bc9df9 100644
--- a/examples/community/ip_adapter_face_id.py
+++ b/examples/community/ip_adapter_face_id.py
@@ -27,7 +27,7 @@
from diffusers.loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
from diffusers.models import AutoencoderKL, UNet2DConditionModel
from diffusers.models.lora import LoRALinearLayer, adjust_lora_scale_text_encoder
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import KarrasDiffusionSchedulers
@@ -415,7 +415,7 @@ def retrieve_timesteps(
class IPAdapterFaceIDStableDiffusionPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
TextualInversionLoaderMixin,
LoraLoaderMixin,
IPAdapterMixin,
diff --git a/examples/community/latent_consistency_interpolate.py b/examples/community/latent_consistency_interpolate.py
index a70fb6a467f5..44a7c8bec07a 100644
--- a/examples/community/latent_consistency_interpolate.py
+++ b/examples/community/latent_consistency_interpolate.py
@@ -9,7 +9,7 @@
from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
from diffusers.models import AutoencoderKL, UNet2DConditionModel
from diffusers.models.lora import adjust_lora_scale_text_encoder
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker
from diffusers.schedulers import LCMScheduler
from diffusers.utils import (
@@ -190,7 +190,7 @@ def slerp(
class LatentConsistencyModelWalkPipeline(
- DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
+ DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
):
r"""
Pipeline for text-to-image generation using a latent consistency model.
diff --git a/examples/community/llm_grounded_diffusion.py b/examples/community/llm_grounded_diffusion.py
index 39d530e09b1c..7f7b0fad39f8 100644
--- a/examples/community/llm_grounded_diffusion.py
+++ b/examples/community/llm_grounded_diffusion.py
@@ -35,7 +35,7 @@
from diffusers.models.attention_processor import AttnProcessor2_0
from diffusers.models.lora import adjust_lora_scale_text_encoder
from diffusers.pipelines import DiffusionPipeline
-from diffusers.pipelines.pipeline_utils import EfficiencyMixin
+from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import KarrasDiffusionSchedulers
@@ -269,7 +269,7 @@ def __call__(
class LLMGroundedDiffusionPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
TextualInversionLoaderMixin,
LoraLoaderMixin,
IPAdapterMixin,
diff --git a/examples/community/lpw_stable_diffusion.py b/examples/community/lpw_stable_diffusion.py
index debaef7d4642..819787ee9a02 100644
--- a/examples/community/lpw_stable_diffusion.py
+++ b/examples/community/lpw_stable_diffusion.py
@@ -13,7 +13,7 @@
from diffusers.image_processor import VaeImageProcessor
from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
from diffusers.models import AutoencoderKL, UNet2DConditionModel
-from diffusers.pipelines.pipeline_utils import EfficiencyMixin
+from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
@@ -409,7 +409,7 @@ def preprocess_mask(mask, batch_size, scale_factor=8):
class StableDiffusionLongPromptWeightingPipeline(
- DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
+ DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
):
r"""
Pipeline for text-to-image generation using Stable Diffusion without tokens length limit, and support parsing
diff --git a/examples/community/lpw_stable_diffusion_xl.py b/examples/community/lpw_stable_diffusion_xl.py
index 83f0e8b7818d..c5c93d9ea381 100644
--- a/examples/community/lpw_stable_diffusion_xl.py
+++ b/examples/community/lpw_stable_diffusion_xl.py
@@ -30,7 +30,7 @@
LoRAXFormersAttnProcessor,
XFormersAttnProcessor,
)
-from diffusers.pipelines.pipeline_utils import EfficiencyMixin
+from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
@@ -546,7 +546,7 @@ def retrieve_timesteps(
class SDXLLongPromptWeightingPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
FromSingleFileMixin,
IPAdapterMixin,
LoraLoaderMixin,
diff --git a/examples/community/pipeline_animatediff_controlnet.py b/examples/community/pipeline_animatediff_controlnet.py
index dc7b6302c5ea..a1d6c5605102 100644
--- a/examples/community/pipeline_animatediff_controlnet.py
+++ b/examples/community/pipeline_animatediff_controlnet.py
@@ -28,7 +28,7 @@
from diffusers.models.lora import adjust_lora_scale_text_encoder
from diffusers.models.unets.unet_motion_model import MotionAdapter
from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from diffusers.schedulers import (
DDIMScheduler,
DPMSolverMultistepScheduler,
@@ -112,7 +112,7 @@ class AnimateDiffControlNetPipelineOutput(BaseOutput):
class AnimateDiffControlNetPipeline(
- DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin
+ DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin
):
r"""
Pipeline for text-to-video generation.
diff --git a/examples/community/pipeline_animatediff_img2video.py b/examples/community/pipeline_animatediff_img2video.py
index d666e554f07d..d691ece93050 100644
--- a/examples/community/pipeline_animatediff_img2video.py
+++ b/examples/community/pipeline_animatediff_img2video.py
@@ -26,7 +26,7 @@
from diffusers.models import AutoencoderKL, ImageProjection, UNet2DConditionModel, UNetMotionModel
from diffusers.models.lora import adjust_lora_scale_text_encoder
from diffusers.models.unet_motion_model import MotionAdapter
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from diffusers.schedulers import (
DDIMScheduler,
DPMSolverMultistepScheduler,
@@ -231,7 +231,7 @@ class AnimateDiffImgToVideoPipelineOutput(BaseOutput):
class AnimateDiffImgToVideoPipeline(
- DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin
+ DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin
):
r"""
Pipeline for text-to-video generation.
diff --git a/examples/community/pipeline_demofusion_sdxl.py b/examples/community/pipeline_demofusion_sdxl.py
index 69624271beed..e7dc269a1e71 100644
--- a/examples/community/pipeline_demofusion_sdxl.py
+++ b/examples/community/pipeline_demofusion_sdxl.py
@@ -23,7 +23,7 @@
XFormersAttnProcessor,
)
from diffusers.models.lora import adjust_lora_scale_text_encoder
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
is_accelerate_available,
@@ -94,7 +94,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
class DemoFusionSDXLPipeline(
- DiffusionPipeline, EfficiencyMixin, FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
+ DiffusionPipeline, LatentDiffusionMixin, FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
):
r"""
Pipeline for text-to-image generation using Stable Diffusion XL.
diff --git a/examples/community/pipeline_sdxl_style_aligned.py b/examples/community/pipeline_sdxl_style_aligned.py
index 2b6047e97bfb..b547c35f1123 100644
--- a/examples/community/pipeline_sdxl_style_aligned.py
+++ b/examples/community/pipeline_sdxl_style_aligned.py
@@ -51,7 +51,7 @@
XFormersAttnProcessor,
)
from diffusers.models.lora import adjust_lora_scale_text_encoder
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
@@ -85,7 +85,7 @@
>>> from typing import List
>>> import torch
- >>> from diffusers.pipelines.pipeline_utils import DiffusionPipeline,EfficiencyMixin
+ >>> from diffusers.pipelines.pipeline_utils import DiffusionPipeline,LatentDiffusionMixin
>>> from PIL import Image
>>> model_id = "a-r-r-o-w/dreamshaper-xl-turbo"
@@ -389,7 +389,7 @@ def retrieve_latents(
class StyleAlignedSDXLPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
FromSingleFileMixin,
StableDiffusionXLLoraLoaderMixin,
TextualInversionLoaderMixin,
diff --git a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py
index 490fde58b916..49a46e9ba4be 100644
--- a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py
+++ b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py
@@ -33,7 +33,7 @@
)
from diffusers.models.lora import adjust_lora_scale_text_encoder
from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
@@ -159,7 +159,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
class StableDiffusionXLControlNetAdapterPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
FromSingleFileMixin,
StableDiffusionXLLoraLoaderMixin,
TextualInversionLoaderMixin,
diff --git a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py
index 17777090df24..5347ab949697 100644
--- a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py
+++ b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py
@@ -52,7 +52,7 @@
)
from diffusers.models.lora import adjust_lora_scale_text_encoder
from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
-from diffusers.pipelines.pipeline_utils import EfficiencyMixin
+from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
@@ -305,7 +305,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
class StableDiffusionXLControlNetAdapterInpaintPipeline(
- DiffusionPipeline, EfficiencyMixin, FromSingleFileMixin, LoraLoaderMixin
+ DiffusionPipeline, LatentDiffusionMixin, FromSingleFileMixin, LoraLoaderMixin
):
r"""
Pipeline for text-to-image generation using Stable Diffusion augmented with T2I-Adapter
diff --git a/examples/community/pipeline_zero1to3.py b/examples/community/pipeline_zero1to3.py
index 6e1c1d015e48..1656ae674382 100644
--- a/examples/community/pipeline_zero1to3.py
+++ b/examples/community/pipeline_zero1to3.py
@@ -22,10 +22,10 @@
# randn_tensor,
# replace_example_docstring,
# )
-# from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+# from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
# from . import StableDiffusionPipelineOutput
# from .safety_checker import StableDiffusionSafetyChecker
-from diffusers import AutoencoderKL, DiffusionPipeline, EfficiencyMixin, UNet2DConditionModel
+from diffusers import AutoencoderKL, DiffusionPipeline, LatentDiffusionMixin, UNet2DConditionModel
from diffusers.configuration_utils import ConfigMixin, FrozenDict
from diffusers.models.modeling_utils import ModelMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker
@@ -66,7 +66,7 @@ def forward(self, x):
return self.projection(x)
-class Zero1to3StableDiffusionPipeline(DiffusionPipeline, EfficiencyMixin):
+class Zero1to3StableDiffusionPipeline(DiffusionPipeline, LatentDiffusionMixin):
r"""
Pipeline for single view conditioned novel view generation using Zero1to3.
diff --git a/examples/community/sd_text2img_k_diffusion.py b/examples/community/sd_text2img_k_diffusion.py
index 8928eb383b76..3ca36872830e 100755
--- a/examples/community/sd_text2img_k_diffusion.py
+++ b/examples/community/sd_text2img_k_diffusion.py
@@ -19,7 +19,7 @@
import torch
from k_diffusion.external import CompVisDenoiser, CompVisVDenoiser
-from diffusers import DiffusionPipeline, EfficiencyMixin, LMSDiscreteScheduler
+from diffusers import DiffusionPipeline, LatentDiffusionMixin, LMSDiscreteScheduler
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
from diffusers.utils import logging
@@ -41,7 +41,7 @@ def apply_model(self, *args, **kwargs):
return self.model(*args, encoder_hidden_states=encoder_hidden_states, **kwargs).sample
-class StableDiffusionPipeline(DiffusionPipeline, EfficiencyMixin):
+class StableDiffusionPipeline(DiffusionPipeline, LatentDiffusionMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion.
diff --git a/examples/community/seed_resize_stable_diffusion.py b/examples/community/seed_resize_stable_diffusion.py
index c84c222f2360..f5e519e0f23a 100644
--- a/examples/community/seed_resize_stable_diffusion.py
+++ b/examples/community/seed_resize_stable_diffusion.py
@@ -9,7 +9,7 @@
from diffusers import DiffusionPipeline
from diffusers.models import AutoencoderKL, UNet2DConditionModel
-from diffusers.pipelines.pipeline_utils import EfficiencyMixin
+from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
@@ -19,7 +19,7 @@
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class SeedResizeStableDiffusionPipeline(DiffusionPipeline, EfficiencyMixin):
+class SeedResizeStableDiffusionPipeline(DiffusionPipeline, LatentDiffusionMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion.
diff --git a/examples/community/speech_to_image_diffusion.py b/examples/community/speech_to_image_diffusion.py
index 7f0bf4bf0293..3633348a16df 100644
--- a/examples/community/speech_to_image_diffusion.py
+++ b/examples/community/speech_to_image_diffusion.py
@@ -18,7 +18,7 @@
PNDMScheduler,
UNet2DConditionModel,
)
-from diffusers.pipelines.pipeline_utils import EfficiencyMixin
+from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.utils import logging
@@ -27,7 +27,7 @@
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class SpeechToImagePipeline(DiffusionPipeline, EfficiencyMixin):
+class SpeechToImagePipeline(DiffusionPipeline, LatentDiffusionMixin):
def __init__(
self,
speech_model: WhisperForConditionalGeneration,
diff --git a/examples/community/stable_diffusion_comparison.py b/examples/community/stable_diffusion_comparison.py
index 49c9ffa86a61..3723aa01f541 100644
--- a/examples/community/stable_diffusion_comparison.py
+++ b/examples/community/stable_diffusion_comparison.py
@@ -12,7 +12,7 @@
StableDiffusionPipeline,
UNet2DConditionModel,
)
-from diffusers.pipelines.pipeline_utils import EfficiencyMixin
+from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -23,7 +23,7 @@
pipe4_model_id = "CompVis/stable-diffusion-v1-4"
-class StableDiffusionComparisonPipeline(DiffusionPipeline, EfficiencyMixin):
+class StableDiffusionComparisonPipeline(DiffusionPipeline, LatentDiffusionMixin):
r"""
Pipeline for parallel comparison of Stable Diffusion v1-v4
This pipeline inherits from DiffusionPipeline and depends on the use of an Auth Token for
diff --git a/examples/community/stable_diffusion_ipex.py b/examples/community/stable_diffusion_ipex.py
index 48048d5831f4..fba1f145baa6 100644
--- a/examples/community/stable_diffusion_ipex.py
+++ b/examples/community/stable_diffusion_ipex.py
@@ -23,7 +23,7 @@
from diffusers.configuration_utils import FrozenDict
from diffusers.loaders import LoraLoaderMixin, TextualInversionLoaderMixin
from diffusers.models import AutoencoderKL, UNet2DConditionModel
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import KarrasDiffusionSchedulers
@@ -60,7 +60,9 @@
"""
-class StableDiffusionIPEXPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
+class StableDiffusionIPEXPipeline(
+ DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin
+):
r"""
Pipeline for text-to-image generation using Stable Diffusion on IPEX.
diff --git a/examples/community/stable_diffusion_mega.py b/examples/community/stable_diffusion_mega.py
index 470cbab5a527..3bdaa1d8dd58 100644
--- a/examples/community/stable_diffusion_mega.py
+++ b/examples/community/stable_diffusion_mega.py
@@ -16,7 +16,7 @@
UNet2DConditionModel,
)
from diffusers.configuration_utils import FrozenDict
-from diffusers.pipelines.pipeline_utils import EfficiencyMixin
+from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.utils import deprecate, logging
@@ -24,7 +24,7 @@
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class StableDiffusionMegaPipeline(DiffusionPipeline, EfficiencyMixin):
+class StableDiffusionMegaPipeline(DiffusionPipeline, LatentDiffusionMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion.
diff --git a/examples/community/stable_diffusion_repaint.py b/examples/community/stable_diffusion_repaint.py
index 38400b578f97..5ee194ab80c8 100644
--- a/examples/community/stable_diffusion_repaint.py
+++ b/examples/community/stable_diffusion_repaint.py
@@ -24,7 +24,7 @@
from diffusers import AutoencoderKL, DiffusionPipeline, UNet2DConditionModel
from diffusers.configuration_utils import FrozenDict, deprecate
from diffusers.loaders import LoraLoaderMixin, TextualInversionLoaderMixin
-from diffusers.pipelines.pipeline_utils import EfficiencyMixin
+from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import (
StableDiffusionSafetyChecker,
@@ -139,7 +139,9 @@ def prepare_mask_and_masked_image(image, mask):
return mask, masked_image
-class StableDiffusionRepaintPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
+class StableDiffusionRepaintPipeline(
+ DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin
+):
r"""
Pipeline for text-guided image inpainting using Stable Diffusion. *This is an experimental feature*.
This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
diff --git a/examples/community/text_inpainting.py b/examples/community/text_inpainting.py
index 80889d7897bd..4276de3f92d4 100644
--- a/examples/community/text_inpainting.py
+++ b/examples/community/text_inpainting.py
@@ -13,7 +13,7 @@
from diffusers import DiffusionPipeline
from diffusers.configuration_utils import FrozenDict
from diffusers.models import AutoencoderKL, UNet2DConditionModel
-from diffusers.pipelines.pipeline_utils import EfficiencyMixin
+from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionInpaintPipeline
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
@@ -23,7 +23,7 @@
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class TextInpainting(DiffusionPipeline, EfficiencyMixin):
+class TextInpainting(DiffusionPipeline, LatentDiffusionMixin):
r"""
Pipeline for text based inpainting using Stable Diffusion.
Uses CLIPSeg to get a mask from the given text, then calls the Inpainting pipeline with the generated mask
diff --git a/examples/research_projects/controlnetxs/pipeline_controlnet_xs.py b/examples/research_projects/controlnetxs/pipeline_controlnet_xs.py
index 63461fadfc14..6937bf72b86b 100644
--- a/examples/research_projects/controlnetxs/pipeline_controlnet_xs.py
+++ b/examples/research_projects/controlnetxs/pipeline_controlnet_xs.py
@@ -26,7 +26,7 @@
from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
from diffusers.models import AutoencoderKL, UNet2DConditionModel
from diffusers.models.lora import adjust_lora_scale_text_encoder
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import KarrasDiffusionSchedulers
@@ -44,7 +44,7 @@
class StableDiffusionControlNetXSPipeline(
- DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
+ DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
):
r"""
Pipeline for text-to-image generation using Stable Diffusion with ControlNet-XS guidance.
diff --git a/examples/research_projects/controlnetxs/pipeline_controlnet_xs_sd_xl.py b/examples/research_projects/controlnetxs/pipeline_controlnet_xs_sd_xl.py
index bb2e6ad1dff7..50bc6803089e 100644
--- a/examples/research_projects/controlnetxs/pipeline_controlnet_xs_sd_xl.py
+++ b/examples/research_projects/controlnetxs/pipeline_controlnet_xs_sd_xl.py
@@ -31,7 +31,7 @@
XFormersAttnProcessor,
)
from diffusers.models.lora import adjust_lora_scale_text_encoder
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
@@ -53,7 +53,7 @@
class StableDiffusionXLControlNetXSPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
TextualInversionLoaderMixin,
StableDiffusionXLLoraLoaderMixin,
FromSingleFileMixin,
diff --git a/examples/research_projects/rdm/pipeline_rdm.py b/examples/research_projects/rdm/pipeline_rdm.py
index dbc98d028fad..7a5d7cdb5b47 100644
--- a/examples/research_projects/rdm/pipeline_rdm.py
+++ b/examples/research_projects/rdm/pipeline_rdm.py
@@ -19,7 +19,7 @@
UNet2DConditionModel,
)
from diffusers.image_processor import VaeImageProcessor
-from diffusers.pipelines.pipeline_utils import EfficiencyMixin
+from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
from diffusers.utils import logging
from diffusers.utils.torch_utils import randn_tensor
@@ -27,7 +27,7 @@
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class RDMPipeline(DiffusionPipeline, EfficiencyMixin):
+class RDMPipeline(DiffusionPipeline, LatentDiffusionMixin):
r"""
Pipeline for text-to-image generation using Retrieval Augmented Diffusion.
diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py
index 5e276845a85b..dbe3394b0438 100644
--- a/src/diffusers/__init__.py
+++ b/src/diffusers/__init__.py
@@ -121,9 +121,9 @@
"DDPMPipeline",
"DiffusionPipeline",
"DiTPipeline",
- "EfficiencyMixin",
"ImagePipelineOutput",
"KarrasVePipeline",
+ "LatentDiffusionMixin",
"LDMPipeline",
"LDMSuperResolutionPipeline",
"PNDMPipeline",
@@ -506,9 +506,9 @@
DDPMPipeline,
DiffusionPipeline,
DiTPipeline,
- EfficiencyMixin,
ImagePipelineOutput,
KarrasVePipeline,
+ LatentDiffusionMixin,
LDMPipeline,
LDMSuperResolutionPipeline,
PNDMPipeline,
diff --git a/src/diffusers/pipelines/__init__.py b/src/diffusers/pipelines/__init__.py
index 8ccade3aa228..af557aee6fb9 100644
--- a/src/diffusers/pipelines/__init__.py
+++ b/src/diffusers/pipelines/__init__.py
@@ -48,7 +48,7 @@
_import_structure["pipeline_utils"] = [
"AudioPipelineOutput",
"DiffusionPipeline",
- "EfficiencyMixin",
+ "LatentDiffusionMixin",
"ImagePipelineOutput",
]
_import_structure["deprecated"].extend(
@@ -329,8 +329,8 @@
from .pipeline_utils import (
AudioPipelineOutput,
DiffusionPipeline,
- EfficiencyMixin,
ImagePipelineOutput,
+ LatentDiffusionMixin,
)
try:
diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py
index 050475a1ad26..11769c30f3f8 100644
--- a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py
+++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py
@@ -42,7 +42,7 @@
)
from ...utils.torch_utils import randn_tensor
from ..free_init_utils import FreeInitMixin
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from .pipeline_output import AnimateDiffPipelineOutput
@@ -87,7 +87,12 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type:
class AnimateDiffPipeline(
- DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FreeInitMixin
+ DiffusionPipeline,
+ LatentDiffusionMixin,
+ TextualInversionLoaderMixin,
+ IPAdapterMixin,
+ LoraLoaderMixin,
+ FreeInitMixin,
):
r"""
Pipeline for text-to-video generation.
diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py
index 599f0497da97..4ce69450ae9e 100644
--- a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py
+++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py
@@ -35,7 +35,7 @@
from ...utils import USE_PEFT_BACKEND, logging, scale_lora_layers, unscale_lora_layers
from ...utils.torch_utils import randn_tensor
from ..free_init_utils import FreeInitMixin
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from .pipeline_output import AnimateDiffPipelineOutput
@@ -165,7 +165,12 @@ def retrieve_timesteps(
class AnimateDiffVideoToVideoPipeline(
- DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FreeInitMixin
+ DiffusionPipeline,
+ LatentDiffusionMixin,
+ TextualInversionLoaderMixin,
+ IPAdapterMixin,
+ LoraLoaderMixin,
+ FreeInitMixin,
):
r"""
Pipeline for video-to-video generation.
diff --git a/src/diffusers/pipelines/audioldm/pipeline_audioldm.py b/src/diffusers/pipelines/audioldm/pipeline_audioldm.py
index 96cd554c112a..c1661839cc6d 100644
--- a/src/diffusers/pipelines/audioldm/pipeline_audioldm.py
+++ b/src/diffusers/pipelines/audioldm/pipeline_audioldm.py
@@ -24,7 +24,7 @@
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import logging, replace_example_docstring
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, LatentDiffusionMixin
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
@@ -49,7 +49,7 @@
"""
-class AudioLDMPipeline(DiffusionPipeline, EfficiencyMixin):
+class AudioLDMPipeline(DiffusionPipeline, LatentDiffusionMixin):
r"""
Pipeline for text-to-audio generation using AudioLDM.
diff --git a/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py b/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py
index 64f93e3eefea..27118d30d3c8 100644
--- a/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py
+++ b/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py
@@ -173,7 +173,7 @@ def __init__(
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
- # Copied from diffusers.pipelines.pipeline_utils.EfficiencyMixin.enable_vae_slicing
+ # Copied from diffusers.pipelines.pipeline_utils.LatentDiffusionMixin.enable_vae_slicing
def enable_vae_slicing(self):
r"""
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
@@ -181,7 +181,7 @@ def enable_vae_slicing(self):
"""
self.vae.enable_slicing()
- # Copied from diffusers.pipelines.pipeline_utils.EfficiencyMixin.disable_vae_slicing
+ # Copied from diffusers.pipelines.pipeline_utils.LatentDiffusionMixin.disable_vae_slicing
def disable_vae_slicing(self):
r"""
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py
index 31b9107c7d2e..e65df8b78143 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py
@@ -36,7 +36,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import is_compiled_module, is_torch_version, randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from ..stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from .multicontrolnet import MultiControlNetModel
@@ -138,7 +138,7 @@ def retrieve_timesteps(
class StableDiffusionControlNetPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
TextualInversionLoaderMixin,
LoraLoaderMixin,
IPAdapterMixin,
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py
index a3cc0cf3108a..94fc3f8b646a 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py
@@ -35,7 +35,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import is_compiled_module, randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from .multicontrolnet import MultiControlNetModel
@@ -131,7 +131,7 @@ def prepare_image(image):
class StableDiffusionControlNetImg2ImgPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
TextualInversionLoaderMixin,
LoraLoaderMixin,
IPAdapterMixin,
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py
index fcce6f88a3e6..7d456793e293 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py
@@ -37,7 +37,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import is_compiled_module, randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from .multicontrolnet import MultiControlNetModel
@@ -242,7 +242,7 @@ def prepare_mask_and_masked_image(image, mask, height, width, return_image=False
class StableDiffusionControlNetInpaintPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
TextualInversionLoaderMixin,
LoraLoaderMixin,
IPAdapterMixin,
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py
index 212fba1089ca..ee385d1d08ea 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py
@@ -42,7 +42,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import is_compiled_module, randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
from .multicontrolnet import MultiControlNetModel
@@ -140,7 +140,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
class StableDiffusionXLControlNetInpaintPipeline(
- DiffusionPipeline, EfficiencyMixin, StableDiffusionXLLoraLoaderMixin, FromSingleFileMixin
+ DiffusionPipeline, LatentDiffusionMixin, StableDiffusionXLLoraLoaderMixin, FromSingleFileMixin
):
r"""
Pipeline for text-to-image generation using Stable Diffusion XL.
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py
index 02c5e3092696..24d534af9353 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py
@@ -55,7 +55,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import is_compiled_module, is_torch_version, randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
@@ -116,7 +116,7 @@
class StableDiffusionXLControlNetPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
TextualInversionLoaderMixin,
StableDiffusionXLLoraLoaderMixin,
IPAdapterMixin,
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py
index 545285f4d5a9..e07326826c1e 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py
@@ -54,7 +54,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import is_compiled_module, randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
@@ -157,7 +157,11 @@ def retrieve_latents(
class StableDiffusionXLControlNetImg2ImgPipeline(
- DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, StableDiffusionXLLoraLoaderMixin, IPAdapterMixin
+ DiffusionPipeline,
+ LatentDiffusionMixin,
+ TextualInversionLoaderMixin,
+ StableDiffusionXLLoraLoaderMixin,
+ IPAdapterMixin,
):
r"""
Pipeline for image-to-image generation using Stable Diffusion XL with ControlNet guidance.
diff --git a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py
index 47c9cfdf19f7..c15f3fac4972 100644
--- a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py
+++ b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py
@@ -34,7 +34,7 @@
unscale_lora_layers,
)
from ....utils.torch_utils import randn_tensor
-from ...pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ...pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from .modeling_roberta_series import RobertaSeriesModelWithTransformation
from .pipeline_output import AltDiffusionPipelineOutput
@@ -120,7 +120,7 @@ def retrieve_timesteps(
class AltDiffusionPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
TextualInversionLoaderMixin,
LoraLoaderMixin,
IPAdapterMixin,
diff --git a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py
index 14f65f0034a2..9f1e9c3fdd6c 100644
--- a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py
+++ b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py
@@ -37,7 +37,7 @@
unscale_lora_layers,
)
from ....utils.torch_utils import randn_tensor
-from ...pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ...pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from .modeling_roberta_series import RobertaSeriesModelWithTransformation
from .pipeline_output import AltDiffusionPipelineOutput
@@ -160,7 +160,7 @@ def retrieve_timesteps(
class AltDiffusionImg2ImgPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
TextualInversionLoaderMixin,
IPAdapterMixin,
LoraLoaderMixin,
diff --git a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py
index 4d3415ca8139..f1b1c83ff279 100644
--- a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py
+++ b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py
@@ -26,7 +26,7 @@
from ....schedulers.scheduling_utils import SchedulerMixin
from ....utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
from ....utils.torch_utils import randn_tensor
-from ...pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ...pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from ...stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -37,7 +37,7 @@
class StableDiffusionModelEditingPipeline(
- DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin
+ DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin
):
r"""
Pipeline for text-to-image model editing.
diff --git a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py
index 88b9498c9ab0..4c42bb0d2a7d 100644
--- a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py
+++ b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py
@@ -32,7 +32,7 @@
unscale_lora_layers,
)
from ....utils.torch_utils import randn_tensor
-from ...pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ...pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from ...stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -63,7 +63,7 @@
class StableDiffusionParadigmsPipeline(
- DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
+ DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
):
r"""
Pipeline for text-to-image generation using a parallelized version of Stable Diffusion.
diff --git a/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py b/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py
index ac1f44a2f348..732b1bf70b47 100644
--- a/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py
+++ b/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py
@@ -31,7 +31,7 @@
replace_example_docstring,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
@@ -105,7 +105,7 @@ class I2VGenXLPipelineOutput(BaseOutput):
class I2VGenXLPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
):
r"""
Pipeline for image-to-video generation as proposed in [I2VGenXL](https://i2vgen-xl.github.io/).
diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py
index 744ca3d19675..b620dce8129e 100644
--- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py
+++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py
@@ -36,7 +36,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from ..stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker
@@ -130,7 +130,7 @@ def retrieve_timesteps(
class LatentConsistencyModelImg2ImgPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
TextualInversionLoaderMixin,
IPAdapterMixin,
LoraLoaderMixin,
diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py
index 395e4575942d..62ca96079cae 100644
--- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py
+++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py
@@ -35,7 +35,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from ..stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker
@@ -108,7 +108,7 @@ def retrieve_timesteps(
class LatentConsistencyModelPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
TextualInversionLoaderMixin,
IPAdapterMixin,
LoraLoaderMixin,
diff --git a/src/diffusers/pipelines/musicldm/pipeline_musicldm.py b/src/diffusers/pipelines/musicldm/pipeline_musicldm.py
index 7c9617a3e572..8fd728994a44 100644
--- a/src/diffusers/pipelines/musicldm/pipeline_musicldm.py
+++ b/src/diffusers/pipelines/musicldm/pipeline_musicldm.py
@@ -36,7 +36,7 @@
replace_example_docstring,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, LatentDiffusionMixin
if is_librosa_available():
@@ -64,7 +64,7 @@
"""
-class MusicLDMPipeline(DiffusionPipeline, EfficiencyMixin):
+class MusicLDMPipeline(DiffusionPipeline, LatentDiffusionMixin):
r"""
Pipeline for text-to-audio generation using MusicLDM.
diff --git a/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py b/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py
index 85c533acaddf..b32e64d2bdc5 100644
--- a/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py
+++ b/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py
@@ -25,7 +25,7 @@
from ...schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
from ...utils import deprecate, logging
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from .image_encoder import PaintByExampleImageEncoder
@@ -148,7 +148,7 @@ def prepare_mask_and_masked_image(image, mask):
return mask, masked_image
-class PaintByExamplePipeline(DiffusionPipeline, EfficiencyMixin):
+class PaintByExamplePipeline(DiffusionPipeline, LatentDiffusionMixin):
r"""
diff --git a/src/diffusers/pipelines/pia/pipeline_pia.py b/src/diffusers/pipelines/pia/pipeline_pia.py
index 90468b0a2127..b60dd62140af 100644
--- a/src/diffusers/pipelines/pia/pipeline_pia.py
+++ b/src/diffusers/pipelines/pia/pipeline_pia.py
@@ -46,7 +46,7 @@
)
from ...utils.torch_utils import randn_tensor
from ..free_init_utils import FreeInitMixin
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
@@ -212,7 +212,7 @@ class PIAPipelineOutput(BaseOutput):
class PIAPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
TextualInversionLoaderMixin,
IPAdapterMixin,
LoraLoaderMixin,
diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py
index 36fe8d779d27..3a4b22064be2 100644
--- a/src/diffusers/pipelines/pipeline_utils.py
+++ b/src/diffusers/pipelines/pipeline_utils.py
@@ -2098,9 +2098,9 @@ def set_attention_slice(self, slice_size: Optional[int]):
module.set_attention_slice(slice_size)
-class EfficiencyMixin:
+class LatentDiffusionMixin:
r"""
- Helper for DiffusionPipeline with vae and unet.(mainly for stable diffusion)
+ Helper for DiffusionPipeline with vae and unet.(mainly for LDM such as stable diffusion)
"""
def enable_vae_slicing(self):
diff --git a/src/diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py b/src/diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py
index 6513adf5f67d..9e07a1fa1c8e 100644
--- a/src/diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py
+++ b/src/diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py
@@ -11,14 +11,14 @@
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import deprecate, logging
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from .pipeline_output import SemanticStableDiffusionPipelineOutput
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class SemanticStableDiffusionPipeline(DiffusionPipeline, EfficiencyMixin):
+class SemanticStableDiffusionPipeline(DiffusionPipeline, LatentDiffusionMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion with latent editing.
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index 860dfeeb85af..c7abaeebe177 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -34,7 +34,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from .pipeline_output import StableDiffusionPipelineOutput
from .safety_checker import StableDiffusionSafetyChecker
@@ -116,7 +116,7 @@ def retrieve_timesteps(
class StableDiffusionPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
TextualInversionLoaderMixin,
LoraLoaderMixin,
IPAdapterMixin,
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py
index 687dab69455d..1e91063a1228 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py
@@ -26,7 +26,7 @@
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import deprecate, logging
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from . import StableDiffusionPipelineOutput
from .safety_checker import StableDiffusionSafetyChecker
@@ -34,7 +34,7 @@
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class StableDiffusionImageVariationPipeline(DiffusionPipeline, EfficiencyMixin):
+class StableDiffusionImageVariationPipeline(DiffusionPipeline, LatentDiffusionMixin):
r"""
Pipeline to generate image variations from an input image using Stable Diffusion.
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
index 6df7d62d7c9b..904e3c8c2c2a 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
@@ -37,7 +37,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from . import StableDiffusionPipelineOutput
from .safety_checker import StableDiffusionSafetyChecker
@@ -156,7 +156,7 @@ def retrieve_timesteps(
class StableDiffusionImg2ImgPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
TextualInversionLoaderMixin,
IPAdapterMixin,
LoraLoaderMixin,
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
index 6652aecdcb5f..1ea5fa6a0670 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
@@ -29,7 +29,7 @@
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from . import StableDiffusionPipelineOutput
from .safety_checker import StableDiffusionSafetyChecker
@@ -220,7 +220,7 @@ def retrieve_timesteps(
class StableDiffusionInpaintPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
TextualInversionLoaderMixin,
IPAdapterMixin,
LoraLoaderMixin,
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py
index 6a522cafd19f..2d8ed42935a3 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py
@@ -26,7 +26,7 @@
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import PIL_INTERPOLATION, deprecate, logging
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from . import StableDiffusionPipelineOutput
from .safety_checker import StableDiffusionSafetyChecker
@@ -73,7 +73,7 @@ def retrieve_latents(
class StableDiffusionInstructPix2PixPipeline(
- DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin
+ DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin
):
r"""
Pipeline for pixel-level image editing by following text instructions (based on Stable Diffusion).
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py
index 2712b17901a3..6ad27084f8a8 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py
@@ -27,7 +27,7 @@
from ...schedulers import EulerDiscreteScheduler
from ...utils import deprecate, logging
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin, ImagePipelineOutput
+from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput, LatentDiffusionMixin
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
@@ -60,7 +60,7 @@ def preprocess(image):
return image
-class StableDiffusionLatentUpscalePipeline(DiffusionPipeline, EfficiencyMixin, FromSingleFileMixin):
+class StableDiffusionLatentUpscalePipeline(DiffusionPipeline, LatentDiffusionMixin, FromSingleFileMixin):
r"""
Pipeline for upscaling Stable Diffusion output image resolution by a factor of 2.
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
index be26d67322bc..6b0be8b5a8c1 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
@@ -34,7 +34,7 @@
from ...schedulers import DDPMScheduler, KarrasDiffusionSchedulers
from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from . import StableDiffusionPipelineOutput
@@ -68,7 +68,7 @@ def preprocess(image):
class StableDiffusionUpscalePipeline(
- DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
+ DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
):
r"""
Pipeline for text-guided image super-resolution using Stable Diffusion 2.
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py
index 23d9e382ac01..821dbd2b6318 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py
@@ -34,7 +34,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin, ImagePipelineOutput
+from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput, LatentDiffusionMixin
from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
@@ -58,7 +58,7 @@
"""
-class StableUnCLIPPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
+class StableUnCLIPPipeline(DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
"""
Pipeline for text-to-image generation using stable unCLIP.
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py
index 0a3a3c56c6f4..fd1403c8f8f6 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py
@@ -34,7 +34,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin, ImagePipelineOutput
+from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput, LatentDiffusionMixin
from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
@@ -69,7 +69,9 @@
"""
-class StableUnCLIPImg2ImgPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
+class StableUnCLIPImg2ImgPipeline(
+ DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin
+):
"""
Pipeline for text-guided image-to-image generation using stable unCLIP.
diff --git a/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py b/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py
index b267d88e67e0..84796d936bdc 100644
--- a/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py
+++ b/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py
@@ -36,7 +36,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -170,7 +170,7 @@ def __call__(self, attn: Attention, hidden_states, encoder_hidden_states=None, a
return hidden_states
-class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin):
+class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion and Attend-and-Excite.
diff --git a/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py b/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py
index ee6b5a0ac739..ad136b2b3993 100644
--- a/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py
+++ b/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py
@@ -39,7 +39,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -236,7 +236,7 @@ def preprocess_mask(mask, batch_size: int = 1):
class StableDiffusionDiffEditPipeline(
- DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin
+ DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin
):
r"""
diff --git a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py
index 76c2c23a3f2b..99b5fc35cf1a 100644
--- a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py
+++ b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py
@@ -35,7 +35,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -99,7 +99,7 @@
"""
-class StableDiffusionGLIGENPipeline(DiffusionPipeline, EfficiencyMixin):
+class StableDiffusionGLIGENPipeline(DiffusionPipeline, LatentDiffusionMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion with Grounded-Language-to-Image Generation (GLIGEN).
diff --git a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py
index 404f681c3a32..77ccbdf55ca8 100644
--- a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py
+++ b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py
@@ -34,7 +34,7 @@
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import USE_PEFT_BACKEND, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.clip_image_project_model import CLIPImageProjection
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -145,7 +145,7 @@
"""
-class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, EfficiencyMixin):
+class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, LatentDiffusionMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion with Grounded-Language-to-Image Generation (GLIGEN).
diff --git a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py
index ea1beb6788c3..5a3e570cbdaf 100755
--- a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py
@@ -26,7 +26,7 @@
from ...schedulers import LMSDiscreteScheduler
from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
@@ -48,7 +48,7 @@ def apply_model(self, *args, **kwargs):
class StableDiffusionKDiffusionPipeline(
- DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin
+ DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin
):
r"""
Pipeline for text-to-image generation using Stable Diffusion.
diff --git a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py
index 2742b8797c8f..6925e4eeb1a4 100644
--- a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py
@@ -50,7 +50,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
@@ -91,7 +91,7 @@ def apply_model(self, *args, **kwargs):
class StableDiffusionXLKDiffusionPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
FromSingleFileMixin,
StableDiffusionXLLoraLoaderMixin,
TextualInversionLoaderMixin,
diff --git a/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py b/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py
index 8f517e3d035c..a456ea747411 100644
--- a/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py
+++ b/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py
@@ -36,7 +36,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -83,7 +83,7 @@ class LDM3DPipelineOutput(BaseOutput):
class StableDiffusionLDM3DPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
TextualInversionLoaderMixin,
IPAdapterMixin,
LoraLoaderMixin,
diff --git a/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py b/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py
index 29cde4ef328b..22f4ae29c991 100644
--- a/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py
+++ b/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py
@@ -32,7 +32,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -60,7 +60,7 @@
class StableDiffusionPanoramaPipeline(
- DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin
+ DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin
):
r"""
Pipeline for text-to-image generation using MultiDiffusion.
diff --git a/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py b/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py
index 936cd5964666..edd3df1506ca 100644
--- a/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py
+++ b/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py
@@ -14,7 +14,7 @@
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import deprecate, logging
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from . import StableDiffusionSafePipelineOutput
from .safety_checker import SafeStableDiffusionSafetyChecker
@@ -22,7 +22,7 @@
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class StableDiffusionPipelineSafe(DiffusionPipeline, EfficiencyMixin, IPAdapterMixin):
+class StableDiffusionPipelineSafe(DiffusionPipeline, LatentDiffusionMixin, IPAdapterMixin):
r"""
Pipeline based on the [`StableDiffusionPipeline`] for text-to-image generation using Safe Latent Diffusion.
diff --git a/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py b/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py
index 95a4215e6710..8c44849840f9 100644
--- a/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py
+++ b/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py
@@ -33,7 +33,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -98,7 +98,7 @@ def __call__(
# Modified to get self-attention guidance scale in this paper (https://arxiv.org/pdf/2210.00939.pdf) as an input
-class StableDiffusionSAGPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, IPAdapterMixin):
+class StableDiffusionSAGPipeline(DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion.
diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
index 508c09a42fbb..e7d1d28072f6 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
@@ -52,7 +52,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from .pipeline_output import StableDiffusionXLPipelineOutput
@@ -148,7 +148,7 @@ def retrieve_timesteps(
class StableDiffusionXLPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
FromSingleFileMixin,
StableDiffusionXLLoraLoaderMixin,
TextualInversionLoaderMixin,
diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
index 8c71019b7647..a1034caf4398 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
@@ -52,7 +52,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from .pipeline_output import StableDiffusionXLPipelineOutput
@@ -165,7 +165,7 @@ def retrieve_timesteps(
class StableDiffusionXLImg2ImgPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
TextualInversionLoaderMixin,
FromSingleFileMixin,
StableDiffusionXLLoraLoaderMixin,
diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
index f45ebd273f52..43b397fb18a7 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
@@ -53,7 +53,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from .pipeline_output import StableDiffusionXLPipelineOutput
@@ -310,7 +310,7 @@ def retrieve_timesteps(
class StableDiffusionXLInpaintPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
TextualInversionLoaderMixin,
StableDiffusionXLLoraLoaderMixin,
FromSingleFileMixin,
diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py
index 280e75035f1e..de11d8d8749f 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py
@@ -41,7 +41,7 @@
scale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from .pipeline_output import StableDiffusionXLPipelineOutput
@@ -119,7 +119,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
class StableDiffusionXLInstructPix2PixPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
TextualInversionLoaderMixin,
FromSingleFileMixin,
StableDiffusionXLLoraLoaderMixin,
diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py
index 271082df4c4e..a6575886594b 100644
--- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py
+++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py
@@ -37,7 +37,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -163,7 +163,7 @@ def retrieve_timesteps(
return timesteps, num_inference_steps
-class StableDiffusionAdapterPipeline(DiffusionPipeline, EfficiencyMixin):
+class StableDiffusionAdapterPipeline(DiffusionPipeline, LatentDiffusionMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion augmented with T2I-Adapter
https://arxiv.org/abs/2302.08453
diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py
index 1b15f1ec3107..16dd4180c2c2 100644
--- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py
+++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py
@@ -51,7 +51,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
@@ -181,7 +181,7 @@ def retrieve_timesteps(
class StableDiffusionXLAdapterPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
TextualInversionLoaderMixin,
StableDiffusionXLLoraLoaderMixin,
IPAdapterMixin,
diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py
index db0c8e54fb6f..005984f8605a 100644
--- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py
+++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py
@@ -33,7 +33,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from . import TextToVideoSDPipelineOutput
@@ -81,7 +81,7 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type:
return outputs
-class TextToVideoSDPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
+class TextToVideoSDPipeline(DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
r"""
Pipeline for text-to-video generation.
diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py
index dcc7c6a7df0f..8ac6507ee717 100644
--- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py
+++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py
@@ -34,7 +34,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from . import TextToVideoSDPipelineOutput
@@ -157,7 +157,7 @@ def preprocess_video(video):
return video
-class VideoToVideoSDPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
+class VideoToVideoSDPipeline(DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
r"""
Pipeline for text-guided video-to-video generation.
diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py
index 991dfeee0a55..76da107e8967 100644
--- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py
+++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py
@@ -17,7 +17,7 @@
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import USE_PEFT_BACKEND, BaseOutput, logging, scale_lora_layers, unscale_lora_layers
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from ..stable_diffusion import StableDiffusionSafetyChecker
@@ -281,7 +281,7 @@ def create_motion_field_and_warp_latents(motion_field_strength_x, motion_field_s
return warped_latents
-class TextToVideoZeroPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
+class TextToVideoZeroPipeline(DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
r"""
Pipeline for zero-shot text-to-video generation using Stable Diffusion.
diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py
index afd81d04f3fe..c659202838b2 100644
--- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py
+++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py
@@ -37,7 +37,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
if is_invisible_watermark_available():
@@ -327,7 +327,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
class TextToVideoZeroSDXLPipeline(
DiffusionPipeline,
- EfficiencyMixin,
+ LatentDiffusionMixin,
StableDiffusionXLLoraLoaderMixin,
TextualInversionLoaderMixin,
):
diff --git a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py
index cad7cb381e64..2c09bb0fad5f 100644
--- a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py
+++ b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py
@@ -21,7 +21,7 @@
from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
from ...utils.outputs import BaseOutput
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin
+from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
from .modeling_text_decoder import UniDiffuserTextDecoder
from .modeling_uvit import UniDiffuserModel
@@ -48,7 +48,7 @@ class ImageTextPipelineOutput(BaseOutput):
text: Optional[Union[List[str], List[List[str]]]]
-class UniDiffuserPipeline(DiffusionPipeline, EfficiencyMixin):
+class UniDiffuserPipeline(DiffusionPipeline, LatentDiffusionMixin):
r"""
Pipeline for a bimodal image-text model which supports unconditional text and image generation, text-conditioned
image generation, image-conditioned text generation, and joint image-text generation.
diff --git a/src/diffusers/utils/dummy_pt_objects.py b/src/diffusers/utils/dummy_pt_objects.py
index 19f5ac445a9e..a87b27d2c479 100644
--- a/src/diffusers/utils/dummy_pt_objects.py
+++ b/src/diffusers/utils/dummy_pt_objects.py
@@ -570,7 +570,7 @@ def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch"])
-class EfficiencyMixin(metaclass=DummyObject):
+class LatentDiffusionMixin(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
diff --git a/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py b/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py
index a11ca7b4a233..89320eb2b936 100644
--- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py
+++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py
@@ -53,7 +53,7 @@
TEXT_TO_IMAGE_PARAMS,
)
from ..test_pipelines_common import (
- PipelineEfficiencyFunctionTesterMixin,
+ LDMFunctionTesterMixin,
PipelineKarrasSchedulerTesterMixin,
PipelineLatentTesterMixin,
PipelineTesterMixin,
@@ -64,7 +64,7 @@
class StableDiffusion2PipelineFastTests(
- PipelineEfficiencyFunctionTesterMixin,
+ LDMFunctionTesterMixin,
PipelineLatentTesterMixin,
PipelineKarrasSchedulerTesterMixin,
PipelineTesterMixin,
diff --git a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py
index b9e01f598cde..b9327b9d3ce1 100644
--- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py
+++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py
@@ -50,7 +50,7 @@
TEXT_TO_IMAGE_PARAMS,
)
from ..test_pipelines_common import (
- PipelineEfficiencyFunctionTesterMixin,
+ LDMFunctionTesterMixin,
PipelineLatentTesterMixin,
PipelineTesterMixin,
SDXLOptionalComponentsTesterMixin,
@@ -61,7 +61,7 @@
class StableDiffusionXLPipelineFastTests(
- PipelineEfficiencyFunctionTesterMixin,
+ LDMFunctionTesterMixin,
PipelineLatentTesterMixin,
PipelineTesterMixin,
SDXLOptionalComponentsTesterMixin,
diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py
index 58929296e34d..173fdb54c2ba 100644
--- a/tests/pipelines/test_pipelines_common.py
+++ b/tests/pipelines/test_pipelines_common.py
@@ -59,10 +59,10 @@ def check_same_shape(tensor_list):
return all(shape == shapes[0] for shape in shapes[1:])
-class PipelineEfficiencyFunctionTesterMixin:
+class LDMFunctionTesterMixin:
"""
This mixin is designed to be used with PipelineTesterMixin and unittest.TestCase classes.
- It provides a set of common tests for PyTorch pipeline that inherit from EfficiencyMixin, e.g. vae_slicing, vae_tiling, freeu, etc.
+ It provides a set of common tests for PyTorch pipeline that inherit from LatentDiffusionMixin, e.g. vae_slicing, vae_tiling, freeu, etc.
"""
def test_vae_slicing(self):
From 6c11d6a852452777f43bacf39bd829c2ad299732 Mon Sep 17 00:00:00 2001
From: ultranity <1095429904@qq.com>
Date: Wed, 21 Feb 2024 10:58:22 +0800
Subject: [PATCH 07/17] add LDM_component test for pipeline with
LatentDiffusionMixin
---
tests/pipelines/test_pipelines_common.py | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py
index 173fdb54c2ba..f14b4e98eae7 100644
--- a/tests/pipelines/test_pipelines_common.py
+++ b/tests/pipelines/test_pipelines_common.py
@@ -29,6 +29,7 @@
UNet2DConditionModel,
)
from diffusers.image_processor import VaeImageProcessor
+from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import logging
from diffusers.utils.import_utils import is_accelerate_available, is_accelerate_version, is_xformers_available
@@ -1152,6 +1153,14 @@ def callback_increase_guidance(pipe, i, t, callback_kwargs):
# accounts for models that modify the number of inference steps based on strength
assert pipe.guidance_scale == (inputs["guidance_scale"] + pipe.num_timesteps)
+ def test_LDM_component(self):
+ """Any pipeline that have LDMFuncMixin should have vae and unet components."""
+ if not issubclass(self.pipeline_class, LatentDiffusionMixin):
+ return
+ components = self.get_dummy_components()
+ pipe = self.pipeline_class(**components)
+ self.assertTrue(hasattr(pipe, "vae"))
+ self.assertTrue(hasattr(pipe, "unet"))
@is_staging_test
class PipelinePushToHubTester(unittest.TestCase):
From 4602bac99b7c40f8871457616a0ed012282b44a8 Mon Sep 17 00:00:00 2001
From: ultranity <1095429904@qq.com>
Date: Thu, 22 Feb 2024 15:42:00 +0800
Subject: [PATCH 08/17] rename EfficiencyMixin to StableDiffusionMixin
---
.../community/composable_stable_diffusion.py | 4 ++--
examples/community/gluegen.py | 4 ++--
examples/community/ip_adapter_face_id.py | 4 ++--
.../community/latent_consistency_interpolate.py | 4 ++--
examples/community/llm_grounded_diffusion.py | 4 ++--
examples/community/lpw_stable_diffusion.py | 4 ++--
examples/community/lpw_stable_diffusion_xl.py | 4 ++--
.../community/pipeline_animatediff_controlnet.py | 4 ++--
.../community/pipeline_animatediff_img2video.py | 4 ++--
examples/community/pipeline_demofusion_sdxl.py | 4 ++--
.../community/pipeline_sdxl_style_aligned.py | 6 +++---
...ine_stable_diffusion_xl_controlnet_adapter.py | 4 ++--
...le_diffusion_xl_controlnet_adapter_inpaint.py | 4 ++--
examples/community/pipeline_zero1to3.py | 6 +++---
examples/community/sd_text2img_k_diffusion.py | 4 ++--
.../community/seed_resize_stable_diffusion.py | 4 ++--
examples/community/speech_to_image_diffusion.py | 4 ++--
.../community/stable_diffusion_comparison.py | 4 ++--
examples/community/stable_diffusion_ipex.py | 4 ++--
examples/community/stable_diffusion_mega.py | 4 ++--
examples/community/stable_diffusion_repaint.py | 4 ++--
examples/community/text_inpainting.py | 4 ++--
.../controlnetxs/pipeline_controlnet_xs.py | 4 ++--
.../controlnetxs/pipeline_controlnet_xs_sd_xl.py | 4 ++--
examples/research_projects/rdm/pipeline_rdm.py | 4 ++--
src/diffusers/__init__.py | 4 ++--
src/diffusers/pipelines/__init__.py | 4 ++--
.../animatediff/pipeline_animatediff.py | 4 ++--
.../pipeline_animatediff_video2video.py | 4 ++--
.../pipelines/audioldm/pipeline_audioldm.py | 4 ++--
.../pipelines/audioldm2/pipeline_audioldm2.py | 4 ++--
.../pipelines/controlnet/pipeline_controlnet.py | 4 ++--
.../controlnet/pipeline_controlnet_img2img.py | 4 ++--
.../controlnet/pipeline_controlnet_inpaint.py | 4 ++--
.../pipeline_controlnet_inpaint_sd_xl.py | 4 ++--
.../controlnet/pipeline_controlnet_sd_xl.py | 4 ++--
.../pipeline_controlnet_sd_xl_img2img.py | 4 ++--
.../alt_diffusion/pipeline_alt_diffusion.py | 4 ++--
.../pipeline_alt_diffusion_img2img.py | 4 ++--
.../pipeline_stable_diffusion_model_editing.py | 4 ++--
.../pipeline_stable_diffusion_paradigms.py | 4 ++--
.../pipelines/i2vgen_xl/pipeline_i2vgen_xl.py | 4 ++--
.../pipeline_latent_consistency_img2img.py | 4 ++--
.../pipeline_latent_consistency_text2img.py | 4 ++--
.../pipelines/musicldm/pipeline_musicldm.py | 4 ++--
.../pipeline_paint_by_example.py | 4 ++--
src/diffusers/pipelines/pia/pipeline_pia.py | 4 ++--
src/diffusers/pipelines/pipeline_utils.py | 2 +-
.../pipeline_semantic_stable_diffusion.py | 4 ++--
.../pipeline_stable_diffusion.py | 4 ++--
.../pipeline_stable_diffusion_image_variation.py | 4 ++--
.../pipeline_stable_diffusion_img2img.py | 4 ++--
.../pipeline_stable_diffusion_inpaint.py | 4 ++--
...pipeline_stable_diffusion_instruct_pix2pix.py | 4 ++--
.../pipeline_stable_diffusion_latent_upscale.py | 4 ++--
.../pipeline_stable_diffusion_upscale.py | 4 ++--
.../stable_diffusion/pipeline_stable_unclip.py | 4 ++--
.../pipeline_stable_unclip_img2img.py | 4 ++--
...ipeline_stable_diffusion_attend_and_excite.py | 4 ++--
.../pipeline_stable_diffusion_diffedit.py | 4 ++--
.../pipeline_stable_diffusion_gligen.py | 4 ++--
...ipeline_stable_diffusion_gligen_text_image.py | 4 ++--
.../pipeline_stable_diffusion_k_diffusion.py | 4 ++--
.../pipeline_stable_diffusion_xl_k_diffusion.py | 4 ++--
.../pipeline_stable_diffusion_ldm3d.py | 4 ++--
.../pipeline_stable_diffusion_panorama.py | 4 ++--
.../pipeline_stable_diffusion_safe.py | 4 ++--
.../pipeline_stable_diffusion_sag.py | 4 ++--
.../pipeline_stable_diffusion_xl.py | 4 ++--
.../pipeline_stable_diffusion_xl_img2img.py | 4 ++--
.../pipeline_stable_diffusion_xl_inpaint.py | 4 ++--
...eline_stable_diffusion_xl_instruct_pix2pix.py | 4 ++--
.../pipeline_stable_diffusion_adapter.py | 4 ++--
.../pipeline_stable_diffusion_xl_adapter.py | 4 ++--
.../pipeline_text_to_video_synth.py | 4 ++--
.../pipeline_text_to_video_synth_img2img.py | 4 ++--
.../pipeline_text_to_video_zero.py | 4 ++--
.../pipeline_text_to_video_zero_sdxl.py | 4 ++--
.../unidiffuser/pipeline_unidiffuser.py | 4 ++--
src/diffusers/utils/dummy_pt_objects.py | 16 ++++++++--------
.../stable_diffusion_2/test_stable_diffusion.py | 4 ++--
.../test_stable_diffusion_xl.py | 4 ++--
tests/pipelines/test_pipelines_common.py | 12 +++++++-----
83 files changed, 178 insertions(+), 176 deletions(-)
diff --git a/examples/community/composable_stable_diffusion.py b/examples/community/composable_stable_diffusion.py
index c7b91f94f294..3153bd30e479 100644
--- a/examples/community/composable_stable_diffusion.py
+++ b/examples/community/composable_stable_diffusion.py
@@ -22,7 +22,7 @@
from diffusers import DiffusionPipeline
from diffusers.configuration_utils import FrozenDict
from diffusers.models import AutoencoderKL, UNet2DConditionModel
-from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
+from diffusers.pipelines.pipeline_utils import StableDiffusionMixin
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import (
@@ -39,7 +39,7 @@
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class ComposableStableDiffusionPipeline(DiffusionPipeline, LatentDiffusionMixin):
+class ComposableStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion.
diff --git a/examples/community/gluegen.py b/examples/community/gluegen.py
index f0ace91d683c..b8f147000229 100644
--- a/examples/community/gluegen.py
+++ b/examples/community/gluegen.py
@@ -10,7 +10,7 @@
from diffusers.loaders import LoraLoaderMixin
from diffusers.models import AutoencoderKL, UNet2DConditionModel
from diffusers.models.lora import adjust_lora_scale_text_encoder
-from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
+from diffusers.pipelines.pipeline_utils import StableDiffusionMixin
from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import KarrasDiffusionSchedulers
@@ -194,7 +194,7 @@ def retrieve_timesteps(
return timesteps, num_inference_steps
-class GlueGenStableDiffusionPipeline(DiffusionPipeline, LatentDiffusionMixin, LoraLoaderMixin):
+class GlueGenStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin, LoraLoaderMixin):
def __init__(
self,
vae: AutoencoderKL,
diff --git a/examples/community/ip_adapter_face_id.py b/examples/community/ip_adapter_face_id.py
index d1fa98bc9df9..b4d2446b5ce9 100644
--- a/examples/community/ip_adapter_face_id.py
+++ b/examples/community/ip_adapter_face_id.py
@@ -27,7 +27,7 @@
from diffusers.loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
from diffusers.models import AutoencoderKL, UNet2DConditionModel
from diffusers.models.lora import LoRALinearLayer, adjust_lora_scale_text_encoder
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import KarrasDiffusionSchedulers
@@ -415,7 +415,7 @@ def retrieve_timesteps(
class IPAdapterFaceIDStableDiffusionPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
TextualInversionLoaderMixin,
LoraLoaderMixin,
IPAdapterMixin,
diff --git a/examples/community/latent_consistency_interpolate.py b/examples/community/latent_consistency_interpolate.py
index 44a7c8bec07a..0c14a55bd30f 100644
--- a/examples/community/latent_consistency_interpolate.py
+++ b/examples/community/latent_consistency_interpolate.py
@@ -9,7 +9,7 @@
from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
from diffusers.models import AutoencoderKL, UNet2DConditionModel
from diffusers.models.lora import adjust_lora_scale_text_encoder
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker
from diffusers.schedulers import LCMScheduler
from diffusers.utils import (
@@ -190,7 +190,7 @@ def slerp(
class LatentConsistencyModelWalkPipeline(
- DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
+ DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
):
r"""
Pipeline for text-to-image generation using a latent consistency model.
diff --git a/examples/community/llm_grounded_diffusion.py b/examples/community/llm_grounded_diffusion.py
index 7f7b0fad39f8..5db144a9a23a 100644
--- a/examples/community/llm_grounded_diffusion.py
+++ b/examples/community/llm_grounded_diffusion.py
@@ -35,7 +35,7 @@
from diffusers.models.attention_processor import AttnProcessor2_0
from diffusers.models.lora import adjust_lora_scale_text_encoder
from diffusers.pipelines import DiffusionPipeline
-from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
+from diffusers.pipelines.pipeline_utils import StableDiffusionMixin
from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import KarrasDiffusionSchedulers
@@ -269,7 +269,7 @@ def __call__(
class LLMGroundedDiffusionPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
TextualInversionLoaderMixin,
LoraLoaderMixin,
IPAdapterMixin,
diff --git a/examples/community/lpw_stable_diffusion.py b/examples/community/lpw_stable_diffusion.py
index 819787ee9a02..78d93bfb7081 100644
--- a/examples/community/lpw_stable_diffusion.py
+++ b/examples/community/lpw_stable_diffusion.py
@@ -13,7 +13,7 @@
from diffusers.image_processor import VaeImageProcessor
from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
from diffusers.models import AutoencoderKL, UNet2DConditionModel
-from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
+from diffusers.pipelines.pipeline_utils import StableDiffusionMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
@@ -409,7 +409,7 @@ def preprocess_mask(mask, batch_size, scale_factor=8):
class StableDiffusionLongPromptWeightingPipeline(
- DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
+ DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
):
r"""
Pipeline for text-to-image generation using Stable Diffusion without tokens length limit, and support parsing
diff --git a/examples/community/lpw_stable_diffusion_xl.py b/examples/community/lpw_stable_diffusion_xl.py
index c5c93d9ea381..b56adeeea87a 100644
--- a/examples/community/lpw_stable_diffusion_xl.py
+++ b/examples/community/lpw_stable_diffusion_xl.py
@@ -30,7 +30,7 @@
LoRAXFormersAttnProcessor,
XFormersAttnProcessor,
)
-from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
+from diffusers.pipelines.pipeline_utils import StableDiffusionMixin
from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
@@ -546,7 +546,7 @@ def retrieve_timesteps(
class SDXLLongPromptWeightingPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
FromSingleFileMixin,
IPAdapterMixin,
LoraLoaderMixin,
diff --git a/examples/community/pipeline_animatediff_controlnet.py b/examples/community/pipeline_animatediff_controlnet.py
index a1d6c5605102..2d00b40cfcc5 100644
--- a/examples/community/pipeline_animatediff_controlnet.py
+++ b/examples/community/pipeline_animatediff_controlnet.py
@@ -28,7 +28,7 @@
from diffusers.models.lora import adjust_lora_scale_text_encoder
from diffusers.models.unets.unet_motion_model import MotionAdapter
from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from diffusers.schedulers import (
DDIMScheduler,
DPMSolverMultistepScheduler,
@@ -112,7 +112,7 @@ class AnimateDiffControlNetPipelineOutput(BaseOutput):
class AnimateDiffControlNetPipeline(
- DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin
+ DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin
):
r"""
Pipeline for text-to-video generation.
diff --git a/examples/community/pipeline_animatediff_img2video.py b/examples/community/pipeline_animatediff_img2video.py
index d691ece93050..35f7909bf15f 100644
--- a/examples/community/pipeline_animatediff_img2video.py
+++ b/examples/community/pipeline_animatediff_img2video.py
@@ -26,7 +26,7 @@
from diffusers.models import AutoencoderKL, ImageProjection, UNet2DConditionModel, UNetMotionModel
from diffusers.models.lora import adjust_lora_scale_text_encoder
from diffusers.models.unet_motion_model import MotionAdapter
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from diffusers.schedulers import (
DDIMScheduler,
DPMSolverMultistepScheduler,
@@ -231,7 +231,7 @@ class AnimateDiffImgToVideoPipelineOutput(BaseOutput):
class AnimateDiffImgToVideoPipeline(
- DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin
+ DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin
):
r"""
Pipeline for text-to-video generation.
diff --git a/examples/community/pipeline_demofusion_sdxl.py b/examples/community/pipeline_demofusion_sdxl.py
index e7dc269a1e71..e29678b55922 100644
--- a/examples/community/pipeline_demofusion_sdxl.py
+++ b/examples/community/pipeline_demofusion_sdxl.py
@@ -23,7 +23,7 @@
XFormersAttnProcessor,
)
from diffusers.models.lora import adjust_lora_scale_text_encoder
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
is_accelerate_available,
@@ -94,7 +94,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
class DemoFusionSDXLPipeline(
- DiffusionPipeline, LatentDiffusionMixin, FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
+ DiffusionPipeline, StableDiffusionMixin, FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
):
r"""
Pipeline for text-to-image generation using Stable Diffusion XL.
diff --git a/examples/community/pipeline_sdxl_style_aligned.py b/examples/community/pipeline_sdxl_style_aligned.py
index b547c35f1123..cc7804fe4237 100644
--- a/examples/community/pipeline_sdxl_style_aligned.py
+++ b/examples/community/pipeline_sdxl_style_aligned.py
@@ -51,7 +51,7 @@
XFormersAttnProcessor,
)
from diffusers.models.lora import adjust_lora_scale_text_encoder
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
@@ -85,7 +85,7 @@
>>> from typing import List
>>> import torch
- >>> from diffusers.pipelines.pipeline_utils import DiffusionPipeline,LatentDiffusionMixin
+ >>> from diffusers.pipelines.pipeline_utils import DiffusionPipeline,StableDiffusionMixin
>>> from PIL import Image
>>> model_id = "a-r-r-o-w/dreamshaper-xl-turbo"
@@ -389,7 +389,7 @@ def retrieve_latents(
class StyleAlignedSDXLPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
FromSingleFileMixin,
StableDiffusionXLLoraLoaderMixin,
TextualInversionLoaderMixin,
diff --git a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py
index 49a46e9ba4be..fe94646a4436 100644
--- a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py
+++ b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py
@@ -33,7 +33,7 @@
)
from diffusers.models.lora import adjust_lora_scale_text_encoder
from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
@@ -159,7 +159,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
class StableDiffusionXLControlNetAdapterPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
FromSingleFileMixin,
StableDiffusionXLLoraLoaderMixin,
TextualInversionLoaderMixin,
diff --git a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py
index 5347ab949697..2eaa0a5e0d37 100644
--- a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py
+++ b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py
@@ -52,7 +52,7 @@
)
from diffusers.models.lora import adjust_lora_scale_text_encoder
from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
-from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
+from diffusers.pipelines.pipeline_utils import StableDiffusionMixin
from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
@@ -305,7 +305,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
class StableDiffusionXLControlNetAdapterInpaintPipeline(
- DiffusionPipeline, LatentDiffusionMixin, FromSingleFileMixin, LoraLoaderMixin
+ DiffusionPipeline, StableDiffusionMixin, FromSingleFileMixin, LoraLoaderMixin
):
r"""
Pipeline for text-to-image generation using Stable Diffusion augmented with T2I-Adapter
diff --git a/examples/community/pipeline_zero1to3.py b/examples/community/pipeline_zero1to3.py
index 1656ae674382..133aa694c18c 100644
--- a/examples/community/pipeline_zero1to3.py
+++ b/examples/community/pipeline_zero1to3.py
@@ -22,10 +22,10 @@
# randn_tensor,
# replace_example_docstring,
# )
-# from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+# from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
# from . import StableDiffusionPipelineOutput
# from .safety_checker import StableDiffusionSafetyChecker
-from diffusers import AutoencoderKL, DiffusionPipeline, LatentDiffusionMixin, UNet2DConditionModel
+from diffusers import AutoencoderKL, DiffusionPipeline, StableDiffusionMixin, UNet2DConditionModel
from diffusers.configuration_utils import ConfigMixin, FrozenDict
from diffusers.models.modeling_utils import ModelMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker
@@ -66,7 +66,7 @@ def forward(self, x):
return self.projection(x)
-class Zero1to3StableDiffusionPipeline(DiffusionPipeline, LatentDiffusionMixin):
+class Zero1to3StableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
r"""
Pipeline for single view conditioned novel view generation using Zero1to3.
diff --git a/examples/community/sd_text2img_k_diffusion.py b/examples/community/sd_text2img_k_diffusion.py
index 3ca36872830e..3299a7605257 100755
--- a/examples/community/sd_text2img_k_diffusion.py
+++ b/examples/community/sd_text2img_k_diffusion.py
@@ -19,7 +19,7 @@
import torch
from k_diffusion.external import CompVisDenoiser, CompVisVDenoiser
-from diffusers import DiffusionPipeline, LatentDiffusionMixin, LMSDiscreteScheduler
+from diffusers import DiffusionPipeline, LMSDiscreteScheduler, StableDiffusionMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
from diffusers.utils import logging
@@ -41,7 +41,7 @@ def apply_model(self, *args, **kwargs):
return self.model(*args, encoder_hidden_states=encoder_hidden_states, **kwargs).sample
-class StableDiffusionPipeline(DiffusionPipeline, LatentDiffusionMixin):
+class StableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion.
diff --git a/examples/community/seed_resize_stable_diffusion.py b/examples/community/seed_resize_stable_diffusion.py
index f5e519e0f23a..20f972f049b3 100644
--- a/examples/community/seed_resize_stable_diffusion.py
+++ b/examples/community/seed_resize_stable_diffusion.py
@@ -9,7 +9,7 @@
from diffusers import DiffusionPipeline
from diffusers.models import AutoencoderKL, UNet2DConditionModel
-from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
+from diffusers.pipelines.pipeline_utils import StableDiffusionMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
@@ -19,7 +19,7 @@
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class SeedResizeStableDiffusionPipeline(DiffusionPipeline, LatentDiffusionMixin):
+class SeedResizeStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion.
diff --git a/examples/community/speech_to_image_diffusion.py b/examples/community/speech_to_image_diffusion.py
index 3633348a16df..3537ef89e1a1 100644
--- a/examples/community/speech_to_image_diffusion.py
+++ b/examples/community/speech_to_image_diffusion.py
@@ -18,7 +18,7 @@
PNDMScheduler,
UNet2DConditionModel,
)
-from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
+from diffusers.pipelines.pipeline_utils import StableDiffusionMixin
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.utils import logging
@@ -27,7 +27,7 @@
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class SpeechToImagePipeline(DiffusionPipeline, LatentDiffusionMixin):
+class SpeechToImagePipeline(DiffusionPipeline, StableDiffusionMixin):
def __init__(
self,
speech_model: WhisperForConditionalGeneration,
diff --git a/examples/community/stable_diffusion_comparison.py b/examples/community/stable_diffusion_comparison.py
index 3723aa01f541..dab5705b3370 100644
--- a/examples/community/stable_diffusion_comparison.py
+++ b/examples/community/stable_diffusion_comparison.py
@@ -12,7 +12,7 @@
StableDiffusionPipeline,
UNet2DConditionModel,
)
-from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
+from diffusers.pipelines.pipeline_utils import StableDiffusionMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -23,7 +23,7 @@
pipe4_model_id = "CompVis/stable-diffusion-v1-4"
-class StableDiffusionComparisonPipeline(DiffusionPipeline, LatentDiffusionMixin):
+class StableDiffusionComparisonPipeline(DiffusionPipeline, StableDiffusionMixin):
r"""
Pipeline for parallel comparison of Stable Diffusion v1-v4
This pipeline inherits from DiffusionPipeline and depends on the use of an Auth Token for
diff --git a/examples/community/stable_diffusion_ipex.py b/examples/community/stable_diffusion_ipex.py
index fba1f145baa6..8e71f79e9ae4 100644
--- a/examples/community/stable_diffusion_ipex.py
+++ b/examples/community/stable_diffusion_ipex.py
@@ -23,7 +23,7 @@
from diffusers.configuration_utils import FrozenDict
from diffusers.loaders import LoraLoaderMixin, TextualInversionLoaderMixin
from diffusers.models import AutoencoderKL, UNet2DConditionModel
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import KarrasDiffusionSchedulers
@@ -61,7 +61,7 @@
class StableDiffusionIPEXPipeline(
- DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin
+ DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin
):
r"""
Pipeline for text-to-image generation using Stable Diffusion on IPEX.
diff --git a/examples/community/stable_diffusion_mega.py b/examples/community/stable_diffusion_mega.py
index 3bdaa1d8dd58..e53afb703e24 100644
--- a/examples/community/stable_diffusion_mega.py
+++ b/examples/community/stable_diffusion_mega.py
@@ -16,7 +16,7 @@
UNet2DConditionModel,
)
from diffusers.configuration_utils import FrozenDict
-from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
+from diffusers.pipelines.pipeline_utils import StableDiffusionMixin
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.utils import deprecate, logging
@@ -24,7 +24,7 @@
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class StableDiffusionMegaPipeline(DiffusionPipeline, LatentDiffusionMixin):
+class StableDiffusionMegaPipeline(DiffusionPipeline, StableDiffusionMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion.
diff --git a/examples/community/stable_diffusion_repaint.py b/examples/community/stable_diffusion_repaint.py
index 5ee194ab80c8..02bef293bba8 100644
--- a/examples/community/stable_diffusion_repaint.py
+++ b/examples/community/stable_diffusion_repaint.py
@@ -24,7 +24,7 @@
from diffusers import AutoencoderKL, DiffusionPipeline, UNet2DConditionModel
from diffusers.configuration_utils import FrozenDict, deprecate
from diffusers.loaders import LoraLoaderMixin, TextualInversionLoaderMixin
-from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
+from diffusers.pipelines.pipeline_utils import StableDiffusionMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import (
StableDiffusionSafetyChecker,
@@ -140,7 +140,7 @@ def prepare_mask_and_masked_image(image, mask):
class StableDiffusionRepaintPipeline(
- DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin
+ DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin
):
r"""
Pipeline for text-guided image inpainting using Stable Diffusion. *This is an experimental feature*.
diff --git a/examples/community/text_inpainting.py b/examples/community/text_inpainting.py
index 4276de3f92d4..ea4da966bb71 100644
--- a/examples/community/text_inpainting.py
+++ b/examples/community/text_inpainting.py
@@ -13,7 +13,7 @@
from diffusers import DiffusionPipeline
from diffusers.configuration_utils import FrozenDict
from diffusers.models import AutoencoderKL, UNet2DConditionModel
-from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
+from diffusers.pipelines.pipeline_utils import StableDiffusionMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionInpaintPipeline
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
@@ -23,7 +23,7 @@
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class TextInpainting(DiffusionPipeline, LatentDiffusionMixin):
+class TextInpainting(DiffusionPipeline, StableDiffusionMixin):
r"""
Pipeline for text based inpainting using Stable Diffusion.
Uses CLIPSeg to get a mask from the given text, then calls the Inpainting pipeline with the generated mask
diff --git a/examples/research_projects/controlnetxs/pipeline_controlnet_xs.py b/examples/research_projects/controlnetxs/pipeline_controlnet_xs.py
index 6937bf72b86b..88a586e9271d 100644
--- a/examples/research_projects/controlnetxs/pipeline_controlnet_xs.py
+++ b/examples/research_projects/controlnetxs/pipeline_controlnet_xs.py
@@ -26,7 +26,7 @@
from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
from diffusers.models import AutoencoderKL, UNet2DConditionModel
from diffusers.models.lora import adjust_lora_scale_text_encoder
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import KarrasDiffusionSchedulers
@@ -44,7 +44,7 @@
class StableDiffusionControlNetXSPipeline(
- DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
+ DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
):
r"""
Pipeline for text-to-image generation using Stable Diffusion with ControlNet-XS guidance.
diff --git a/examples/research_projects/controlnetxs/pipeline_controlnet_xs_sd_xl.py b/examples/research_projects/controlnetxs/pipeline_controlnet_xs_sd_xl.py
index 50bc6803089e..d0186573fa9c 100644
--- a/examples/research_projects/controlnetxs/pipeline_controlnet_xs_sd_xl.py
+++ b/examples/research_projects/controlnetxs/pipeline_controlnet_xs_sd_xl.py
@@ -31,7 +31,7 @@
XFormersAttnProcessor,
)
from diffusers.models.lora import adjust_lora_scale_text_encoder
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
@@ -53,7 +53,7 @@
class StableDiffusionXLControlNetXSPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
TextualInversionLoaderMixin,
StableDiffusionXLLoraLoaderMixin,
FromSingleFileMixin,
diff --git a/examples/research_projects/rdm/pipeline_rdm.py b/examples/research_projects/rdm/pipeline_rdm.py
index 7a5d7cdb5b47..dd97bf71b9db 100644
--- a/examples/research_projects/rdm/pipeline_rdm.py
+++ b/examples/research_projects/rdm/pipeline_rdm.py
@@ -19,7 +19,7 @@
UNet2DConditionModel,
)
from diffusers.image_processor import VaeImageProcessor
-from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
+from diffusers.pipelines.pipeline_utils import StableDiffusionMixin
from diffusers.utils import logging
from diffusers.utils.torch_utils import randn_tensor
@@ -27,7 +27,7 @@
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class RDMPipeline(DiffusionPipeline, LatentDiffusionMixin):
+class RDMPipeline(DiffusionPipeline, StableDiffusionMixin):
r"""
Pipeline for text-to-image generation using Retrieval Augmented Diffusion.
diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py
index dbe3394b0438..cf07c841b448 100644
--- a/src/diffusers/__init__.py
+++ b/src/diffusers/__init__.py
@@ -123,12 +123,12 @@
"DiTPipeline",
"ImagePipelineOutput",
"KarrasVePipeline",
- "LatentDiffusionMixin",
"LDMPipeline",
"LDMSuperResolutionPipeline",
"PNDMPipeline",
"RePaintPipeline",
"ScoreSdeVePipeline",
+ "StableDiffusionMixin",
]
)
_import_structure["schedulers"].extend(
@@ -508,12 +508,12 @@
DiTPipeline,
ImagePipelineOutput,
KarrasVePipeline,
- LatentDiffusionMixin,
LDMPipeline,
LDMSuperResolutionPipeline,
PNDMPipeline,
RePaintPipeline,
ScoreSdeVePipeline,
+ StableDiffusionMixin,
)
from .schedulers import (
AmusedScheduler,
diff --git a/src/diffusers/pipelines/__init__.py b/src/diffusers/pipelines/__init__.py
index af557aee6fb9..a1840201f8ba 100644
--- a/src/diffusers/pipelines/__init__.py
+++ b/src/diffusers/pipelines/__init__.py
@@ -48,7 +48,7 @@
_import_structure["pipeline_utils"] = [
"AudioPipelineOutput",
"DiffusionPipeline",
- "LatentDiffusionMixin",
+ "StableDiffusionMixin",
"ImagePipelineOutput",
]
_import_structure["deprecated"].extend(
@@ -330,7 +330,7 @@
AudioPipelineOutput,
DiffusionPipeline,
ImagePipelineOutput,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
)
try:
diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py
index 11769c30f3f8..10e7175c2713 100644
--- a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py
+++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py
@@ -42,7 +42,7 @@
)
from ...utils.torch_utils import randn_tensor
from ..free_init_utils import FreeInitMixin
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from .pipeline_output import AnimateDiffPipelineOutput
@@ -88,7 +88,7 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type:
class AnimateDiffPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
TextualInversionLoaderMixin,
IPAdapterMixin,
LoraLoaderMixin,
diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py
index 4ce69450ae9e..bfa1785081a6 100644
--- a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py
+++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py
@@ -35,7 +35,7 @@
from ...utils import USE_PEFT_BACKEND, logging, scale_lora_layers, unscale_lora_layers
from ...utils.torch_utils import randn_tensor
from ..free_init_utils import FreeInitMixin
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from .pipeline_output import AnimateDiffPipelineOutput
@@ -166,7 +166,7 @@ def retrieve_timesteps(
class AnimateDiffVideoToVideoPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
TextualInversionLoaderMixin,
IPAdapterMixin,
LoraLoaderMixin,
diff --git a/src/diffusers/pipelines/audioldm/pipeline_audioldm.py b/src/diffusers/pipelines/audioldm/pipeline_audioldm.py
index c1661839cc6d..69bebdd0dc4f 100644
--- a/src/diffusers/pipelines/audioldm/pipeline_audioldm.py
+++ b/src/diffusers/pipelines/audioldm/pipeline_audioldm.py
@@ -24,7 +24,7 @@
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import logging, replace_example_docstring
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, StableDiffusionMixin
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
@@ -49,7 +49,7 @@
"""
-class AudioLDMPipeline(DiffusionPipeline, LatentDiffusionMixin):
+class AudioLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
r"""
Pipeline for text-to-audio generation using AudioLDM.
diff --git a/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py b/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py
index 27118d30d3c8..e01aa9929dd8 100644
--- a/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py
+++ b/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py
@@ -173,7 +173,7 @@ def __init__(
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
- # Copied from diffusers.pipelines.pipeline_utils.LatentDiffusionMixin.enable_vae_slicing
+ # Copied from diffusers.pipelines.pipeline_utils.StableDiffusionMixin.enable_vae_slicing
def enable_vae_slicing(self):
r"""
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
@@ -181,7 +181,7 @@ def enable_vae_slicing(self):
"""
self.vae.enable_slicing()
- # Copied from diffusers.pipelines.pipeline_utils.LatentDiffusionMixin.disable_vae_slicing
+ # Copied from diffusers.pipelines.pipeline_utils.StableDiffusionMixin.disable_vae_slicing
def disable_vae_slicing(self):
r"""
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py
index e65df8b78143..9f968daaa03c 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py
@@ -36,7 +36,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import is_compiled_module, is_torch_version, randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from ..stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from .multicontrolnet import MultiControlNetModel
@@ -138,7 +138,7 @@ def retrieve_timesteps(
class StableDiffusionControlNetPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
TextualInversionLoaderMixin,
LoraLoaderMixin,
IPAdapterMixin,
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py
index 94fc3f8b646a..304767107332 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py
@@ -35,7 +35,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import is_compiled_module, randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from .multicontrolnet import MultiControlNetModel
@@ -131,7 +131,7 @@ def prepare_image(image):
class StableDiffusionControlNetImg2ImgPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
TextualInversionLoaderMixin,
LoraLoaderMixin,
IPAdapterMixin,
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py
index 7d456793e293..96c4245ba9e6 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py
@@ -37,7 +37,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import is_compiled_module, randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from .multicontrolnet import MultiControlNetModel
@@ -242,7 +242,7 @@ def prepare_mask_and_masked_image(image, mask, height, width, return_image=False
class StableDiffusionControlNetInpaintPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
TextualInversionLoaderMixin,
LoraLoaderMixin,
IPAdapterMixin,
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py
index 4da8542392c6..bde3647c0c48 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py
@@ -53,7 +53,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import is_compiled_module, randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
from .multicontrolnet import MultiControlNetModel
@@ -151,7 +151,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
class StableDiffusionXLControlNetInpaintPipeline(
- DiffusionPipeline, LatentDiffusionMixin, StableDiffusionXLLoraLoaderMixin, FromSingleFileMixin, IPAdapterMixin
+ DiffusionPipeline, StableDiffusionMixin, StableDiffusionXLLoraLoaderMixin, FromSingleFileMixin, IPAdapterMixin
):
r"""
Pipeline for text-to-image generation using Stable Diffusion XL.
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py
index 24d534af9353..4e0a880a4a11 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py
@@ -55,7 +55,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import is_compiled_module, is_torch_version, randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
@@ -116,7 +116,7 @@
class StableDiffusionXLControlNetPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
TextualInversionLoaderMixin,
StableDiffusionXLLoraLoaderMixin,
IPAdapterMixin,
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py
index e07326826c1e..41a8c4fa005e 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py
@@ -54,7 +54,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import is_compiled_module, randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
@@ -158,7 +158,7 @@ def retrieve_latents(
class StableDiffusionXLControlNetImg2ImgPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
TextualInversionLoaderMixin,
StableDiffusionXLLoraLoaderMixin,
IPAdapterMixin,
diff --git a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py
index c15f3fac4972..e4583699e79e 100644
--- a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py
+++ b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py
@@ -34,7 +34,7 @@
unscale_lora_layers,
)
from ....utils.torch_utils import randn_tensor
-from ...pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from .modeling_roberta_series import RobertaSeriesModelWithTransformation
from .pipeline_output import AltDiffusionPipelineOutput
@@ -120,7 +120,7 @@ def retrieve_timesteps(
class AltDiffusionPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
TextualInversionLoaderMixin,
LoraLoaderMixin,
IPAdapterMixin,
diff --git a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py
index 9f1e9c3fdd6c..156e52c249d9 100644
--- a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py
+++ b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py
@@ -37,7 +37,7 @@
unscale_lora_layers,
)
from ....utils.torch_utils import randn_tensor
-from ...pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from .modeling_roberta_series import RobertaSeriesModelWithTransformation
from .pipeline_output import AltDiffusionPipelineOutput
@@ -160,7 +160,7 @@ def retrieve_timesteps(
class AltDiffusionImg2ImgPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
TextualInversionLoaderMixin,
IPAdapterMixin,
LoraLoaderMixin,
diff --git a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py
index f1b1c83ff279..dee93fc2eb53 100644
--- a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py
+++ b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py
@@ -26,7 +26,7 @@
from ....schedulers.scheduling_utils import SchedulerMixin
from ....utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
from ....utils.torch_utils import randn_tensor
-from ...pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from ...stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -37,7 +37,7 @@
class StableDiffusionModelEditingPipeline(
- DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin
+ DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin
):
r"""
Pipeline for text-to-image model editing.
diff --git a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py
index 4c42bb0d2a7d..ddc866ef9b86 100644
--- a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py
+++ b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py
@@ -32,7 +32,7 @@
unscale_lora_layers,
)
from ....utils.torch_utils import randn_tensor
-from ...pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from ...stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -63,7 +63,7 @@
class StableDiffusionParadigmsPipeline(
- DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
+ DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
):
r"""
Pipeline for text-to-image generation using a parallelized version of Stable Diffusion.
diff --git a/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py b/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py
index 732b1bf70b47..2df21533962c 100644
--- a/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py
+++ b/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py
@@ -31,7 +31,7 @@
replace_example_docstring,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
@@ -105,7 +105,7 @@ class I2VGenXLPipelineOutput(BaseOutput):
class I2VGenXLPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
):
r"""
Pipeline for image-to-video generation as proposed in [I2VGenXL](https://i2vgen-xl.github.io/).
diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py
index b620dce8129e..d2580b83c74a 100644
--- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py
+++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py
@@ -36,7 +36,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from ..stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker
@@ -130,7 +130,7 @@ def retrieve_timesteps(
class LatentConsistencyModelImg2ImgPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
TextualInversionLoaderMixin,
IPAdapterMixin,
LoraLoaderMixin,
diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py
index 62ca96079cae..a383f346aacd 100644
--- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py
+++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py
@@ -35,7 +35,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from ..stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker
@@ -108,7 +108,7 @@ def retrieve_timesteps(
class LatentConsistencyModelPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
TextualInversionLoaderMixin,
IPAdapterMixin,
LoraLoaderMixin,
diff --git a/src/diffusers/pipelines/musicldm/pipeline_musicldm.py b/src/diffusers/pipelines/musicldm/pipeline_musicldm.py
index 8fd728994a44..5fde3450b9a0 100644
--- a/src/diffusers/pipelines/musicldm/pipeline_musicldm.py
+++ b/src/diffusers/pipelines/musicldm/pipeline_musicldm.py
@@ -36,7 +36,7 @@
replace_example_docstring,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, StableDiffusionMixin
if is_librosa_available():
@@ -64,7 +64,7 @@
"""
-class MusicLDMPipeline(DiffusionPipeline, LatentDiffusionMixin):
+class MusicLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
r"""
Pipeline for text-to-audio generation using MusicLDM.
diff --git a/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py b/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py
index b32e64d2bdc5..8a24f134e793 100644
--- a/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py
+++ b/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py
@@ -25,7 +25,7 @@
from ...schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
from ...utils import deprecate, logging
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from .image_encoder import PaintByExampleImageEncoder
@@ -148,7 +148,7 @@ def prepare_mask_and_masked_image(image, mask):
return mask, masked_image
-class PaintByExamplePipeline(DiffusionPipeline, LatentDiffusionMixin):
+class PaintByExamplePipeline(DiffusionPipeline, StableDiffusionMixin):
r"""
diff --git a/src/diffusers/pipelines/pia/pipeline_pia.py b/src/diffusers/pipelines/pia/pipeline_pia.py
index b60dd62140af..37c0ed142c51 100644
--- a/src/diffusers/pipelines/pia/pipeline_pia.py
+++ b/src/diffusers/pipelines/pia/pipeline_pia.py
@@ -46,7 +46,7 @@
)
from ...utils.torch_utils import randn_tensor
from ..free_init_utils import FreeInitMixin
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
@@ -212,7 +212,7 @@ class PIAPipelineOutput(BaseOutput):
class PIAPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
TextualInversionLoaderMixin,
IPAdapterMixin,
LoraLoaderMixin,
diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py
index 3a4b22064be2..d472687bb915 100644
--- a/src/diffusers/pipelines/pipeline_utils.py
+++ b/src/diffusers/pipelines/pipeline_utils.py
@@ -2098,7 +2098,7 @@ def set_attention_slice(self, slice_size: Optional[int]):
module.set_attention_slice(slice_size)
-class LatentDiffusionMixin:
+class StableDiffusionMixin:
r"""
Helper for DiffusionPipeline with vae and unet.(mainly for LDM such as stable diffusion)
"""
diff --git a/src/diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py b/src/diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py
index 9e07a1fa1c8e..f0e25264ffa7 100644
--- a/src/diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py
+++ b/src/diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py
@@ -11,14 +11,14 @@
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import deprecate, logging
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from .pipeline_output import SemanticStableDiffusionPipelineOutput
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class SemanticStableDiffusionPipeline(DiffusionPipeline, LatentDiffusionMixin):
+class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion with latent editing.
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index c7abaeebe177..762565ea1fd3 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -34,7 +34,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from .pipeline_output import StableDiffusionPipelineOutput
from .safety_checker import StableDiffusionSafetyChecker
@@ -116,7 +116,7 @@ def retrieve_timesteps(
class StableDiffusionPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
TextualInversionLoaderMixin,
LoraLoaderMixin,
IPAdapterMixin,
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py
index 1e91063a1228..1333cb825750 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py
@@ -26,7 +26,7 @@
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import deprecate, logging
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from . import StableDiffusionPipelineOutput
from .safety_checker import StableDiffusionSafetyChecker
@@ -34,7 +34,7 @@
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class StableDiffusionImageVariationPipeline(DiffusionPipeline, LatentDiffusionMixin):
+class StableDiffusionImageVariationPipeline(DiffusionPipeline, StableDiffusionMixin):
r"""
Pipeline to generate image variations from an input image using Stable Diffusion.
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
index 904e3c8c2c2a..e79a053b7662 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
@@ -37,7 +37,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from . import StableDiffusionPipelineOutput
from .safety_checker import StableDiffusionSafetyChecker
@@ -156,7 +156,7 @@ def retrieve_timesteps(
class StableDiffusionImg2ImgPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
TextualInversionLoaderMixin,
IPAdapterMixin,
LoraLoaderMixin,
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
index 1ea5fa6a0670..62e289c7ba36 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
@@ -29,7 +29,7 @@
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from . import StableDiffusionPipelineOutput
from .safety_checker import StableDiffusionSafetyChecker
@@ -220,7 +220,7 @@ def retrieve_timesteps(
class StableDiffusionInpaintPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
TextualInversionLoaderMixin,
IPAdapterMixin,
LoraLoaderMixin,
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py
index 2d8ed42935a3..89d4278937fe 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py
@@ -26,7 +26,7 @@
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import PIL_INTERPOLATION, deprecate, logging
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from . import StableDiffusionPipelineOutput
from .safety_checker import StableDiffusionSafetyChecker
@@ -73,7 +73,7 @@ def retrieve_latents(
class StableDiffusionInstructPix2PixPipeline(
- DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin
+ DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin
):
r"""
Pipeline for pixel-level image editing by following text instructions (based on Stable Diffusion).
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py
index 6ad27084f8a8..918dffe5199d 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py
@@ -27,7 +27,7 @@
from ...schedulers import EulerDiscreteScheduler
from ...utils import deprecate, logging
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput, StableDiffusionMixin
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
@@ -60,7 +60,7 @@ def preprocess(image):
return image
-class StableDiffusionLatentUpscalePipeline(DiffusionPipeline, LatentDiffusionMixin, FromSingleFileMixin):
+class StableDiffusionLatentUpscalePipeline(DiffusionPipeline, StableDiffusionMixin, FromSingleFileMixin):
r"""
Pipeline for upscaling Stable Diffusion output image resolution by a factor of 2.
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
index 6b0be8b5a8c1..2d04cf41d9b5 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
@@ -34,7 +34,7 @@
from ...schedulers import DDPMScheduler, KarrasDiffusionSchedulers
from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from . import StableDiffusionPipelineOutput
@@ -68,7 +68,7 @@ def preprocess(image):
class StableDiffusionUpscalePipeline(
- DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
+ DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
):
r"""
Pipeline for text-guided image super-resolution using Stable Diffusion 2.
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py
index 821dbd2b6318..c62e0f4ec50f 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py
@@ -34,7 +34,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput, StableDiffusionMixin
from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
@@ -58,7 +58,7 @@
"""
-class StableUnCLIPPipeline(DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
+class StableUnCLIPPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
"""
Pipeline for text-to-image generation using stable unCLIP.
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py
index fd1403c8f8f6..9b85d9e6b1a4 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py
@@ -34,7 +34,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput, StableDiffusionMixin
from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
@@ -70,7 +70,7 @@
class StableUnCLIPImg2ImgPipeline(
- DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin
+ DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin
):
"""
Pipeline for text-guided image-to-image generation using stable unCLIP.
diff --git a/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py b/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py
index 84796d936bdc..03c80b46b806 100644
--- a/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py
+++ b/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py
@@ -36,7 +36,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -170,7 +170,7 @@ def __call__(self, attn: Attention, hidden_states, encoder_hidden_states=None, a
return hidden_states
-class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin):
+class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion and Attend-and-Excite.
diff --git a/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py b/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py
index ad136b2b3993..4c90ce0646c4 100644
--- a/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py
+++ b/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py
@@ -39,7 +39,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -236,7 +236,7 @@ def preprocess_mask(mask, batch_size: int = 1):
class StableDiffusionDiffEditPipeline(
- DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin
+ DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin
):
r"""
diff --git a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py
index 99b5fc35cf1a..9f0d1190fd87 100644
--- a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py
+++ b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py
@@ -35,7 +35,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -99,7 +99,7 @@
"""
-class StableDiffusionGLIGENPipeline(DiffusionPipeline, LatentDiffusionMixin):
+class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion with Grounded-Language-to-Image Generation (GLIGEN).
diff --git a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py
index 77ccbdf55ca8..bbffaf2884a3 100644
--- a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py
+++ b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py
@@ -34,7 +34,7 @@
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import USE_PEFT_BACKEND, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.clip_image_project_model import CLIPImageProjection
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -145,7 +145,7 @@
"""
-class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, LatentDiffusionMixin):
+class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, StableDiffusionMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion with Grounded-Language-to-Image Generation (GLIGEN).
diff --git a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py
index 5a3e570cbdaf..bc565c938a30 100755
--- a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py
@@ -26,7 +26,7 @@
from ...schedulers import LMSDiscreteScheduler
from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
@@ -48,7 +48,7 @@ def apply_model(self, *args, **kwargs):
class StableDiffusionKDiffusionPipeline(
- DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin
+ DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin
):
r"""
Pipeline for text-to-image generation using Stable Diffusion.
diff --git a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py
index 6925e4eeb1a4..ed46a1e36b60 100644
--- a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py
@@ -50,7 +50,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
@@ -91,7 +91,7 @@ def apply_model(self, *args, **kwargs):
class StableDiffusionXLKDiffusionPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
FromSingleFileMixin,
StableDiffusionXLLoraLoaderMixin,
TextualInversionLoaderMixin,
diff --git a/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py b/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py
index a456ea747411..ddbf9ebbb1d0 100644
--- a/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py
+++ b/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py
@@ -36,7 +36,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -83,7 +83,7 @@ class LDM3DPipelineOutput(BaseOutput):
class StableDiffusionLDM3DPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
TextualInversionLoaderMixin,
IPAdapterMixin,
LoraLoaderMixin,
diff --git a/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py b/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py
index 22f4ae29c991..57ca56f9afd8 100644
--- a/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py
+++ b/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py
@@ -32,7 +32,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -60,7 +60,7 @@
class StableDiffusionPanoramaPipeline(
- DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin
+ DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin
):
r"""
Pipeline for text-to-image generation using MultiDiffusion.
diff --git a/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py b/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py
index edd3df1506ca..24c648a813ba 100644
--- a/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py
+++ b/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py
@@ -14,7 +14,7 @@
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import deprecate, logging
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from . import StableDiffusionSafePipelineOutput
from .safety_checker import SafeStableDiffusionSafetyChecker
@@ -22,7 +22,7 @@
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-class StableDiffusionPipelineSafe(DiffusionPipeline, LatentDiffusionMixin, IPAdapterMixin):
+class StableDiffusionPipelineSafe(DiffusionPipeline, StableDiffusionMixin, IPAdapterMixin):
r"""
Pipeline based on the [`StableDiffusionPipeline`] for text-to-image generation using Safe Latent Diffusion.
diff --git a/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py b/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py
index 8c44849840f9..878a3fdac211 100644
--- a/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py
+++ b/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py
@@ -33,7 +33,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from ..stable_diffusion import StableDiffusionPipelineOutput
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -98,7 +98,7 @@ def __call__(
# Modified to get self-attention guidance scale in this paper (https://arxiv.org/pdf/2210.00939.pdf) as an input
-class StableDiffusionSAGPipeline(DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin):
+class StableDiffusionSAGPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion.
diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
index e7d1d28072f6..5e95535ef50e 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
@@ -52,7 +52,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from .pipeline_output import StableDiffusionXLPipelineOutput
@@ -148,7 +148,7 @@ def retrieve_timesteps(
class StableDiffusionXLPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
FromSingleFileMixin,
StableDiffusionXLLoraLoaderMixin,
TextualInversionLoaderMixin,
diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
index a1034caf4398..eb5d9c64538a 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
@@ -52,7 +52,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from .pipeline_output import StableDiffusionXLPipelineOutput
@@ -165,7 +165,7 @@ def retrieve_timesteps(
class StableDiffusionXLImg2ImgPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
TextualInversionLoaderMixin,
FromSingleFileMixin,
StableDiffusionXLLoraLoaderMixin,
diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
index 43b397fb18a7..61b9f7ed2fbd 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
@@ -53,7 +53,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from .pipeline_output import StableDiffusionXLPipelineOutput
@@ -310,7 +310,7 @@ def retrieve_timesteps(
class StableDiffusionXLInpaintPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
TextualInversionLoaderMixin,
StableDiffusionXLLoraLoaderMixin,
FromSingleFileMixin,
diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py
index de11d8d8749f..b3327996263a 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py
@@ -41,7 +41,7 @@
scale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from .pipeline_output import StableDiffusionXLPipelineOutput
@@ -119,7 +119,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
class StableDiffusionXLInstructPix2PixPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
TextualInversionLoaderMixin,
FromSingleFileMixin,
StableDiffusionXLLoraLoaderMixin,
diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py
index a6575886594b..0b55bb38b5eb 100644
--- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py
+++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py
@@ -37,7 +37,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -163,7 +163,7 @@ def retrieve_timesteps(
return timesteps, num_inference_steps
-class StableDiffusionAdapterPipeline(DiffusionPipeline, LatentDiffusionMixin):
+class StableDiffusionAdapterPipeline(DiffusionPipeline, StableDiffusionMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion augmented with T2I-Adapter
https://arxiv.org/abs/2302.08453
diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py
index 16dd4180c2c2..96c7c6857c05 100644
--- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py
+++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py
@@ -51,7 +51,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
@@ -181,7 +181,7 @@ def retrieve_timesteps(
class StableDiffusionXLAdapterPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
TextualInversionLoaderMixin,
StableDiffusionXLLoraLoaderMixin,
IPAdapterMixin,
diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py
index 005984f8605a..0ed0765703f2 100644
--- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py
+++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py
@@ -33,7 +33,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from . import TextToVideoSDPipelineOutput
@@ -81,7 +81,7 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type:
return outputs
-class TextToVideoSDPipeline(DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
+class TextToVideoSDPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
r"""
Pipeline for text-to-video generation.
diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py
index 8ac6507ee717..40c486316e13 100644
--- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py
+++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py
@@ -34,7 +34,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from . import TextToVideoSDPipelineOutput
@@ -157,7 +157,7 @@ def preprocess_video(video):
return video
-class VideoToVideoSDPipeline(DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
+class VideoToVideoSDPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
r"""
Pipeline for text-guided video-to-video generation.
diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py
index 76da107e8967..408ae23f4d9f 100644
--- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py
+++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py
@@ -17,7 +17,7 @@
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import USE_PEFT_BACKEND, BaseOutput, logging, scale_lora_layers, unscale_lora_layers
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from ..stable_diffusion import StableDiffusionSafetyChecker
@@ -281,7 +281,7 @@ def create_motion_field_and_warp_latents(motion_field_strength_x, motion_field_s
return warped_latents
-class TextToVideoZeroPipeline(DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
+class TextToVideoZeroPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin):
r"""
Pipeline for zero-shot text-to-video generation using Stable Diffusion.
diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py
index c659202838b2..eaa2760363a9 100644
--- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py
+++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py
@@ -37,7 +37,7 @@
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
if is_invisible_watermark_available():
@@ -327,7 +327,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
class TextToVideoZeroSDXLPipeline(
DiffusionPipeline,
- LatentDiffusionMixin,
+ StableDiffusionMixin,
StableDiffusionXLLoraLoaderMixin,
TextualInversionLoaderMixin,
):
diff --git a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py
index 2c09bb0fad5f..bacc1c40abcc 100644
--- a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py
+++ b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py
@@ -21,7 +21,7 @@
from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
from ...utils.outputs import BaseOutput
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from .modeling_text_decoder import UniDiffuserTextDecoder
from .modeling_uvit import UniDiffuserModel
@@ -48,7 +48,7 @@ class ImageTextPipelineOutput(BaseOutput):
text: Optional[Union[List[str], List[List[str]]]]
-class UniDiffuserPipeline(DiffusionPipeline, LatentDiffusionMixin):
+class UniDiffuserPipeline(DiffusionPipeline, StableDiffusionMixin):
r"""
Pipeline for a bimodal image-text model which supports unconditional text and image generation, text-conditioned
image generation, image-conditioned text generation, and joint image-text generation.
diff --git a/src/diffusers/utils/dummy_pt_objects.py b/src/diffusers/utils/dummy_pt_objects.py
index a87b27d2c479..5a87f26fcc94 100644
--- a/src/diffusers/utils/dummy_pt_objects.py
+++ b/src/diffusers/utils/dummy_pt_objects.py
@@ -570,7 +570,7 @@ def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch"])
-class LatentDiffusionMixin(metaclass=DummyObject):
+class ImagePipelineOutput(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
@@ -585,7 +585,7 @@ def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch"])
-class ImagePipelineOutput(metaclass=DummyObject):
+class KarrasVePipeline(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
@@ -600,7 +600,7 @@ def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch"])
-class KarrasVePipeline(metaclass=DummyObject):
+class LDMPipeline(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
@@ -615,7 +615,7 @@ def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch"])
-class LDMPipeline(metaclass=DummyObject):
+class LDMSuperResolutionPipeline(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
@@ -630,7 +630,7 @@ def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch"])
-class LDMSuperResolutionPipeline(metaclass=DummyObject):
+class PNDMPipeline(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
@@ -645,7 +645,7 @@ def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch"])
-class PNDMPipeline(metaclass=DummyObject):
+class RePaintPipeline(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
@@ -660,7 +660,7 @@ def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch"])
-class RePaintPipeline(metaclass=DummyObject):
+class ScoreSdeVePipeline(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
@@ -675,7 +675,7 @@ def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch"])
-class ScoreSdeVePipeline(metaclass=DummyObject):
+class StableDiffusionMixin(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
diff --git a/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py b/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py
index 89320eb2b936..7aef098916ca 100644
--- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py
+++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py
@@ -53,10 +53,10 @@
TEXT_TO_IMAGE_PARAMS,
)
from ..test_pipelines_common import (
- LDMFunctionTesterMixin,
PipelineKarrasSchedulerTesterMixin,
PipelineLatentTesterMixin,
PipelineTesterMixin,
+ SDFunctionTesterMixin,
)
@@ -64,7 +64,7 @@
class StableDiffusion2PipelineFastTests(
- LDMFunctionTesterMixin,
+ SDFunctionTesterMixin,
PipelineLatentTesterMixin,
PipelineKarrasSchedulerTesterMixin,
PipelineTesterMixin,
diff --git a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py
index b9327b9d3ce1..82eedac84ca3 100644
--- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py
+++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py
@@ -50,9 +50,9 @@
TEXT_TO_IMAGE_PARAMS,
)
from ..test_pipelines_common import (
- LDMFunctionTesterMixin,
PipelineLatentTesterMixin,
PipelineTesterMixin,
+ SDFunctionTesterMixin,
SDXLOptionalComponentsTesterMixin,
)
@@ -61,7 +61,7 @@
class StableDiffusionXLPipelineFastTests(
- LDMFunctionTesterMixin,
+ SDFunctionTesterMixin,
PipelineLatentTesterMixin,
PipelineTesterMixin,
SDXLOptionalComponentsTesterMixin,
diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py
index f14b4e98eae7..333137cc7861 100644
--- a/tests/pipelines/test_pipelines_common.py
+++ b/tests/pipelines/test_pipelines_common.py
@@ -29,7 +29,7 @@
UNet2DConditionModel,
)
from diffusers.image_processor import VaeImageProcessor
-from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin
+from diffusers.pipelines.pipeline_utils import StableDiffusionMixin
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import logging
from diffusers.utils.import_utils import is_accelerate_available, is_accelerate_version, is_xformers_available
@@ -60,10 +60,10 @@ def check_same_shape(tensor_list):
return all(shape == shapes[0] for shape in shapes[1:])
-class LDMFunctionTesterMixin:
+class SDFunctionTesterMixin:
"""
This mixin is designed to be used with PipelineTesterMixin and unittest.TestCase classes.
- It provides a set of common tests for PyTorch pipeline that inherit from LatentDiffusionMixin, e.g. vae_slicing, vae_tiling, freeu, etc.
+ It provides a set of common tests for PyTorch pipeline that inherit from StableDiffusionMixin, e.g. vae_slicing, vae_tiling, freeu, etc.
"""
def test_vae_slicing(self):
@@ -94,7 +94,8 @@ def test_vae_tiling(self):
components = self.get_dummy_components()
# make sure here that pndm scheduler skips prk
- components["safety_checker"] = None
+ if "safety_checker" in components:
+ components["safety_checker"] = None
pipe = self.pipeline_class(**components)
pipe = pipe.to(device)
pipe.set_progress_bar_config(disable=None)
@@ -1155,13 +1156,14 @@ def callback_increase_guidance(pipe, i, t, callback_kwargs):
def test_LDM_component(self):
"""Any pipeline that have LDMFuncMixin should have vae and unet components."""
- if not issubclass(self.pipeline_class, LatentDiffusionMixin):
+ if not issubclass(self.pipeline_class, StableDiffusionMixin):
return
components = self.get_dummy_components()
pipe = self.pipeline_class(**components)
self.assertTrue(hasattr(pipe, "vae"))
self.assertTrue(hasattr(pipe, "unet"))
+
@is_staging_test
class PipelinePushToHubTester(unittest.TestCase):
identifier = uuid.uuid4()
From ebfd3a77faa811ce275896346073cd94fec55121 Mon Sep 17 00:00:00 2001
From: ultranity <1095429904@qq.com>
Date: Fri, 23 Feb 2024 16:05:59 +0800
Subject: [PATCH 09/17] Add more SDFunctionTesterMixin to cover different UNet
type
---
tests/pipelines/animatediff/test_animatediff.py | 6 ++++--
tests/pipelines/i2vgen_xl/test_i2vgenxl.py | 4 ++--
tests/pipelines/test_pipelines_common.py | 10 ++++++++--
.../text_to_video_synthesis/test_text_to_video.py | 4 ++--
4 files changed, 16 insertions(+), 8 deletions(-)
diff --git a/tests/pipelines/animatediff/test_animatediff.py b/tests/pipelines/animatediff/test_animatediff.py
index 3b789e4ff0f3..288f856dc677 100644
--- a/tests/pipelines/animatediff/test_animatediff.py
+++ b/tests/pipelines/animatediff/test_animatediff.py
@@ -18,7 +18,7 @@
from diffusers.utils.testing_utils import numpy_cosine_similarity_distance, require_torch_gpu, slow, torch_device
from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_PARAMS
-from ..test_pipelines_common import IPAdapterTesterMixin, PipelineTesterMixin
+from ..test_pipelines_common import IPAdapterTesterMixin, PipelineTesterMixin, SDFunctionTesterMixin
def to_np(tensor):
@@ -28,7 +28,9 @@ def to_np(tensor):
return tensor
-class AnimateDiffPipelineFastTests(IPAdapterTesterMixin, PipelineTesterMixin, unittest.TestCase):
+class AnimateDiffPipelineFastTests(
+ IPAdapterTesterMixin, SDFunctionTesterMixin, PipelineTesterMixin, unittest.TestCase
+):
pipeline_class = AnimateDiffPipeline
params = TEXT_TO_IMAGE_PARAMS
batch_params = TEXT_TO_IMAGE_BATCH_PARAMS
diff --git a/tests/pipelines/i2vgen_xl/test_i2vgenxl.py b/tests/pipelines/i2vgen_xl/test_i2vgenxl.py
index 004b06f160bd..aeda67174ad5 100644
--- a/tests/pipelines/i2vgen_xl/test_i2vgenxl.py
+++ b/tests/pipelines/i2vgen_xl/test_i2vgenxl.py
@@ -46,14 +46,14 @@
torch_device,
)
-from ..test_pipelines_common import PipelineTesterMixin
+from ..test_pipelines_common import PipelineTesterMixin, SDFunctionTesterMixin
enable_full_determinism()
@skip_mps
-class I2VGenXLPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
+class I2VGenXLPipelineFastTests(SDFunctionTesterMixin, PipelineTesterMixin, unittest.TestCase):
pipeline_class = I2VGenXLPipeline
params = frozenset(["prompt", "negative_prompt", "image"])
batch_params = frozenset(["prompt", "negative_prompt", "image", "generator"])
diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py
index 41d14dd6c7dd..6c0ff7d9a9fd 100644
--- a/tests/pipelines/test_pipelines_common.py
+++ b/tests/pipelines/test_pipelines_common.py
@@ -30,6 +30,9 @@
)
from diffusers.image_processor import VaeImageProcessor
from diffusers.loaders import IPAdapterMixin
+from diffusers.models.unets.unet_3d_condition import UNet3DConditionModel
+from diffusers.models.unets.unet_i2vgen_xl import I2VGenXLUNet
+from diffusers.models.unets.unet_motion_model import UNetMotionModel
from diffusers.pipelines.pipeline_utils import StableDiffusionMixin
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import logging
@@ -1274,8 +1277,11 @@ def test_LDM_component(self):
return
components = self.get_dummy_components()
pipe = self.pipeline_class(**components)
- self.assertTrue(hasattr(pipe, "vae"))
- self.assertTrue(hasattr(pipe, "unet"))
+ self.assertTrue(hasattr(pipe, "vae") and isinstance(self.pipe.vae, (AutoencoderKL, AutoencoderTiny)))
+ self.assertTrue(
+ hasattr(pipe, "unet")
+ and isinstance(pipe.unet, (UNet2DConditionModel, UNet3DConditionModel, I2VGenXLUNet, UNetMotionModel))
+ )
@is_staging_test
diff --git a/tests/pipelines/text_to_video_synthesis/test_text_to_video.py b/tests/pipelines/text_to_video_synthesis/test_text_to_video.py
index d988350505a8..9dc48011d2f1 100644
--- a/tests/pipelines/text_to_video_synthesis/test_text_to_video.py
+++ b/tests/pipelines/text_to_video_synthesis/test_text_to_video.py
@@ -37,14 +37,14 @@
)
from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_PARAMS
-from ..test_pipelines_common import PipelineTesterMixin
+from ..test_pipelines_common import PipelineTesterMixin, SDFunctionTesterMixin
enable_full_determinism()
@skip_mps
-class TextToVideoSDPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
+class TextToVideoSDPipelineFastTests(PipelineTesterMixin, SDFunctionTesterMixin, unittest.TestCase):
pipeline_class = TextToVideoSDPipeline
params = TEXT_TO_IMAGE_PARAMS
batch_params = TEXT_TO_IMAGE_BATCH_PARAMS
From 066c14d227d2cfcb1e92b6475f217c97456eee64 Mon Sep 17 00:00:00 2001
From: ultranity <1095429904@qq.com>
Date: Fri, 23 Feb 2024 16:06:17 +0800
Subject: [PATCH 10/17] add StableDiffusionMixin to InstaFlowPipeline
---
examples/community/instaflow_one_step.py | 35 +++---------------------
1 file changed, 4 insertions(+), 31 deletions(-)
diff --git a/examples/community/instaflow_one_step.py b/examples/community/instaflow_one_step.py
index 065abfe13d23..b07d85f8fcdf 100644
--- a/examples/community/instaflow_one_step.py
+++ b/examples/community/instaflow_one_step.py
@@ -24,7 +24,7 @@
from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
from diffusers.models import AutoencoderKL, UNet2DConditionModel
from diffusers.models.lora import adjust_lora_scale_text_encoder
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import KarrasDiffusionSchedulers
@@ -52,7 +52,9 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
return noise_cfg
-class InstaFlowPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin):
+class InstaFlowPipeline(
+ DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
+):
r"""
Pipeline for text-to-image generation using Rectified Flow and Euler discretization.
This customized pipeline is based on StableDiffusionPipeline from the official Diffusers library (0.21.4)
@@ -180,35 +182,6 @@ def __init__(
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker)
- def enable_vae_slicing(self):
- r"""
- Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
- compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
- """
- self.vae.enable_slicing()
-
- def disable_vae_slicing(self):
- r"""
- Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_slicing()
-
- def enable_vae_tiling(self):
- r"""
- Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
- compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
- processing larger images.
- """
- self.vae.enable_tiling()
-
- def disable_vae_tiling(self):
- r"""
- Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
- computing decoding in one step.
- """
- self.vae.disable_tiling()
-
def _encode_prompt(
self,
prompt,
From fdc43c5475e2c495bf53b4491ff812ce72146fac Mon Sep 17 00:00:00 2001
From: ultranity <1095429904@qq.com>
Date: Fri, 23 Feb 2024 16:06:46 +0800
Subject: [PATCH 11/17] remove StableDiffusionMixin from UniDiffuserPipeline
---
.../unidiffuser/pipeline_unidiffuser.py | 37 ++++++++++++++++++-
1 file changed, 35 insertions(+), 2 deletions(-)
diff --git a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py
index bacc1c40abcc..5d61b1054e1c 100644
--- a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py
+++ b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py
@@ -21,7 +21,7 @@
from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
from ...utils.outputs import BaseOutput
from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
+from ..pipeline_utils import DiffusionPipeline
from .modeling_text_decoder import UniDiffuserTextDecoder
from .modeling_uvit import UniDiffuserModel
@@ -48,7 +48,7 @@ class ImageTextPipelineOutput(BaseOutput):
text: Optional[Union[List[str], List[List[str]]]]
-class UniDiffuserPipeline(DiffusionPipeline, StableDiffusionMixin):
+class UniDiffuserPipeline(DiffusionPipeline):
r"""
Pipeline for a bimodal image-text model which supports unconditional text and image generation, text-conditioned
image generation, image-conditioned text generation, and joint image-text generation.
@@ -211,6 +211,39 @@ def _infer_mode(self, prompt, prompt_embeds, image, latents, prompt_latents, vae
return mode
+ # Copied from diffusers.pipelines.pipeline_utils.StableDiffusionMixin.enable_vae_slicing
+ def enable_vae_slicing(self):
+ r"""
+ Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
+ compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
+ """
+ self.vae.enable_slicing()
+
+ # Copied from diffusers.pipelines.pipeline_utils.StableDiffusionMixin.disable_vae_slicing
+ def disable_vae_slicing(self):
+ r"""
+ Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
+ computing decoding in one step.
+ """
+ self.vae.disable_slicing()
+
+ # Copied from diffusers.pipelines.pipeline_utils.StableDiffusionMixin.enable_vae_tiling
+ def enable_vae_tiling(self):
+ r"""
+ Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
+ compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
+ processing larger images.
+ """
+ self.vae.enable_tiling()
+
+ # Copied from diffusers.pipelines.pipeline_utils.StableDiffusionMixin.disable_vae_tiling
+ def disable_vae_tiling(self):
+ r"""
+ Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
+ computing decoding in one step.
+ """
+ self.vae.disable_tiling()
+
# Functions to manually set the mode
def set_text_mode(self):
r"""Manually set the generation mode to unconditional ("marginal") text generation."""
From 009914468207430b13a025a100702287ca31c594 Mon Sep 17 00:00:00 2001
From: YiYi Xu
Date: Fri, 23 Feb 2024 15:32:20 -1000
Subject: [PATCH 12/17] Update tests/pipelines/test_pipelines_common.py
---
tests/pipelines/test_pipelines_common.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py
index 6c0ff7d9a9fd..428cb0750088 100644
--- a/tests/pipelines/test_pipelines_common.py
+++ b/tests/pipelines/test_pipelines_common.py
@@ -1277,7 +1277,7 @@ def test_LDM_component(self):
return
components = self.get_dummy_components()
pipe = self.pipeline_class(**components)
- self.assertTrue(hasattr(pipe, "vae") and isinstance(self.pipe.vae, (AutoencoderKL, AutoencoderTiny)))
+ self.assertTrue(hasattr(pipe, "vae") and isinstance(pipe.vae, (AutoencoderKL, AutoencoderTiny)))
self.assertTrue(
hasattr(pipe, "unet")
and isinstance(pipe.unet, (UNet2DConditionModel, UNet3DConditionModel, I2VGenXLUNet, UNetMotionModel))
From 4a294ecf8556465f12b05d235be148f7eb7d7015 Mon Sep 17 00:00:00 2001
From: ultranity <1095429904@qq.com>
Date: Sun, 25 Feb 2024 12:03:06 +0800
Subject: [PATCH 13/17] make SDFunctionTesterMixin run on non-image diffsuion
pipeline
---
tests/pipelines/test_pipelines_common.py | 33 +++++++++++++-----------
1 file changed, 18 insertions(+), 15 deletions(-)
diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py
index 428cb0750088..95ef904095ee 100644
--- a/tests/pipelines/test_pipelines_common.py
+++ b/tests/pipelines/test_pipelines_common.py
@@ -89,10 +89,10 @@ def test_vae_slicing(self):
pipe.enable_vae_slicing()
inputs = self.get_dummy_inputs(device)
inputs["prompt"] = [inputs["prompt"]] * image_count
+ inputs["return_dict"] = False
output_2 = pipe(**inputs)
- # there is a small discrepancy at image borders vs. full batch decode
- assert np.abs(output_2.images.flatten() - output_1.images.flatten()).max() < 3e-3
+ assert np.abs(output_2[0].flatten() - output_1[0].flatten()).max() < 3e-3
def test_vae_tiling(self):
device = "cpu" # ensure determinism for the device-dependent torch.Generator
@@ -109,14 +109,14 @@ def test_vae_tiling(self):
# Test that tiled decode at 512x512 yields the same result as the non-tiled decode
generator = torch.Generator(device=device).manual_seed(0)
- output_1 = pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np")
+ output_1 = pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np", return_dict=False)
# make sure tiled vae decode yields the same result
pipe.enable_vae_tiling()
generator = torch.Generator(device=device).manual_seed(0)
- output_2 = pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np")
+ output_2 = pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np", return_dict=False)
- assert np.abs(output_2.images.flatten() - output_1.images.flatten()).max() < 5e-1
+ assert np.abs(output_2[0].flatten() - output_1[0].flatten()).max() < 5e-1
# test that tiled decode works with various shapes
shapes = [(1, 4, 73, 97), (1, 4, 97, 73), (1, 4, 49, 65), (1, 4, 65, 49)]
@@ -131,10 +131,10 @@ def test_freeu_enabled(self):
pipe.set_progress_bar_config(disable=None)
prompt = "hey"
- output = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0)).images
+ output = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0), return_dict=False)[0]
pipe.enable_freeu(s1=0.9, s2=0.2, b1=1.2, b2=1.4)
- output_freeu = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0)).images
+ output_freeu = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0), return_dict=False)[0]
assert not np.allclose(
output[0, -3:, -3:, -1], output_freeu[0, -3:, -3:, -1]
@@ -147,7 +147,7 @@ def test_freeu_disabled(self):
pipe.set_progress_bar_config(disable=None)
prompt = "hey"
- output = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0)).images
+ output = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0), return_dict=False)[0]
pipe.enable_freeu(s1=0.9, s2=0.2, b1=1.2, b2=1.4)
pipe.disable_freeu()
@@ -157,7 +157,7 @@ def test_freeu_disabled(self):
for key in freeu_keys:
assert getattr(upsample_block, key) is None, f"Disabling of FreeU should have set {key} to None."
- output_no_freeu = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0)).images
+ output_no_freeu = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0), return_dict=False)[0]
assert np.allclose(
output[0, -3:, -3:, -1], output_no_freeu[0, -3:, -3:, -1]
@@ -171,18 +171,21 @@ def test_fused_qkv_projections(self):
pipe.set_progress_bar_config(disable=None)
inputs = self.get_dummy_inputs(device)
- image = pipe(**inputs).images
+ inputs["return_dict"] = False
+ image = pipe(**inputs)[0]
original_image_slice = image[0, -3:, -3:, -1]
pipe.fuse_qkv_projections()
inputs = self.get_dummy_inputs(device)
- image = pipe(**inputs).images
- image_slice_fused = image[0, -3:, -3:, -1]
+ inputs["return_dict"] = False
+ image_fused = pipe(**inputs)[0]
+ image_slice_fused = image_fused[0, -3:, -3:, -1]
pipe.unfuse_qkv_projections()
inputs = self.get_dummy_inputs(device)
- image = pipe(**inputs).images
- image_slice_disabled = image[0, -3:, -3:, -1]
+ inputs["return_dict"] = False
+ image_disabled = pipe(**inputs)[0]
+ image_slice_disabled = image_disabled[0, -3:, -3:, -1]
assert np.allclose(
original_image_slice, image_slice_fused, atol=1e-2, rtol=1e-2
@@ -1271,7 +1274,7 @@ def callback_increase_guidance(pipe, i, t, callback_kwargs):
# accounts for models that modify the number of inference steps based on strength
assert pipe.guidance_scale == (inputs["guidance_scale"] + pipe.num_timesteps)
- def test_LDM_component(self):
+ def test_StableDiffusionMixin_component(self):
"""Any pipeline that have LDMFuncMixin should have vae and unet components."""
if not issubclass(self.pipeline_class, StableDiffusionMixin):
return
From b3c3de04feac417c6011eb7424422e5906813373 Mon Sep 17 00:00:00 2001
From: ultranity <1095429904@qq.com>
Date: Sun, 25 Feb 2024 12:03:18 +0800
Subject: [PATCH 14/17] fix fuse_projection by check is_cross_attention when
init
---
src/diffusers/models/attention_processor.py | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/src/diffusers/models/attention_processor.py b/src/diffusers/models/attention_processor.py
index 1c008264ba33..3d973608934a 100644
--- a/src/diffusers/models/attention_processor.py
+++ b/src/diffusers/models/attention_processor.py
@@ -116,6 +116,8 @@ def __init__(
super().__init__()
self.inner_dim = out_dim if out_dim is not None else dim_head * heads
self.query_dim = query_dim
+ self.use_bias = bias
+ self.is_cross_attention = cross_attention_dim is not None
self.cross_attention_dim = cross_attention_dim if cross_attention_dim is not None else query_dim
self.upcast_attention = upcast_attention
self.upcast_softmax = upcast_softmax
@@ -693,27 +695,32 @@ def norm_encoder_hidden_states(self, encoder_hidden_states: torch.Tensor) -> tor
@torch.no_grad()
def fuse_projections(self, fuse=True):
- is_cross_attention = self.cross_attention_dim != self.query_dim
device = self.to_q.weight.data.device
dtype = self.to_q.weight.data.dtype
- if not is_cross_attention:
+ if not self.is_cross_attention:
# fetch weight matrices.
concatenated_weights = torch.cat([self.to_q.weight.data, self.to_k.weight.data, self.to_v.weight.data])
in_features = concatenated_weights.shape[1]
out_features = concatenated_weights.shape[0]
# create a new single projection layer and copy over the weights.
- self.to_qkv = self.linear_cls(in_features, out_features, bias=False, device=device, dtype=dtype)
+ self.to_qkv = self.linear_cls(in_features, out_features, bias=self.use_bias, device=device, dtype=dtype)
self.to_qkv.weight.copy_(concatenated_weights)
+ if self.use_bias:
+ concatenated_bias = torch.cat([self.to_q.bias.data, self.to_k.bias.data, self.to_v.bias.data])
+ self.to_qkv.bias.copy_(concatenated_bias)
else:
concatenated_weights = torch.cat([self.to_k.weight.data, self.to_v.weight.data])
in_features = concatenated_weights.shape[1]
out_features = concatenated_weights.shape[0]
- self.to_kv = self.linear_cls(in_features, out_features, bias=False, device=device, dtype=dtype)
+ self.to_kv = self.linear_cls(in_features, out_features, bias=self.use_bias, device=device, dtype=dtype)
self.to_kv.weight.copy_(concatenated_weights)
+ if self.use_bias:
+ concatenated_bias = torch.cat([ self.to_k.bias.data, self.to_v.bias.data])
+ self.to_kv.bias.copy_(concatenated_bias)
self.fused_projections = fuse
From 994299c3113b32b9c8d4199afe511dafd8b7a685 Mon Sep 17 00:00:00 2001
From: ultranity <1095429904@qq.com>
Date: Tue, 27 Feb 2024 20:52:09 +0800
Subject: [PATCH 15/17] use get_dummy_inputs for test_vae_tiling and test_freeu
---
src/diffusers/models/attention_processor.py | 2 +-
tests/pipelines/test_pipelines_common.py | 32 ++++++++++++---------
2 files changed, 20 insertions(+), 14 deletions(-)
diff --git a/src/diffusers/models/attention_processor.py b/src/diffusers/models/attention_processor.py
index 9d13b0aec945..5ec8876fc114 100644
--- a/src/diffusers/models/attention_processor.py
+++ b/src/diffusers/models/attention_processor.py
@@ -723,7 +723,7 @@ def fuse_projections(self, fuse=True):
self.to_kv = self.linear_cls(in_features, out_features, bias=self.use_bias, device=device, dtype=dtype)
self.to_kv.weight.copy_(concatenated_weights)
if self.use_bias:
- concatenated_bias = torch.cat([ self.to_k.bias.data, self.to_v.bias.data])
+ concatenated_bias = torch.cat([self.to_k.bias.data, self.to_v.bias.data])
self.to_kv.bias.copy_(concatenated_bias)
self.fused_projections = fuse
diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py
index 95ef904095ee..0e3d20baeb8a 100644
--- a/tests/pipelines/test_pipelines_common.py
+++ b/tests/pipelines/test_pipelines_common.py
@@ -105,18 +105,19 @@ def test_vae_tiling(self):
pipe = pipe.to(device)
pipe.set_progress_bar_config(disable=None)
- prompt = "A painting of a squirrel eating a burger"
+ inputs = self.get_dummy_inputs(torch_device)
+ inputs["return_dict"] = False
# Test that tiled decode at 512x512 yields the same result as the non-tiled decode
- generator = torch.Generator(device=device).manual_seed(0)
- output_1 = pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np", return_dict=False)
+ output_1 = pipe(**inputs)[0]
# make sure tiled vae decode yields the same result
pipe.enable_vae_tiling()
- generator = torch.Generator(device=device).manual_seed(0)
- output_2 = pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np", return_dict=False)
+ inputs = self.get_dummy_inputs(torch_device)
+ inputs["return_dict"] = False
+ output_2 = pipe(**inputs)[0]
- assert np.abs(output_2[0].flatten() - output_1[0].flatten()).max() < 5e-1
+ assert np.abs(output_2.flatten() - output_1.flatten()).max() < 5e-1
# test that tiled decode works with various shapes
shapes = [(1, 4, 73, 97), (1, 4, 97, 73), (1, 4, 49, 65), (1, 4, 65, 49)]
@@ -130,11 +131,14 @@ def test_freeu_enabled(self):
pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
- prompt = "hey"
- output = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0), return_dict=False)[0]
+ inputs = self.get_dummy_inputs(torch_device)
+ inputs["return_dict"] = False
+ output = pipe(**inputs)[0]
pipe.enable_freeu(s1=0.9, s2=0.2, b1=1.2, b2=1.4)
- output_freeu = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0), return_dict=False)[0]
+ inputs = self.get_dummy_inputs(torch_device)
+ inputs["return_dict"] = False
+ output_freeu = pipe(**inputs)[0]
assert not np.allclose(
output[0, -3:, -3:, -1], output_freeu[0, -3:, -3:, -1]
@@ -146,8 +150,9 @@ def test_freeu_disabled(self):
pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
- prompt = "hey"
- output = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0), return_dict=False)[0]
+ inputs = self.get_dummy_inputs(torch_device)
+ inputs["return_dict"] = False
+ output = pipe(**inputs)[0]
pipe.enable_freeu(s1=0.9, s2=0.2, b1=1.2, b2=1.4)
pipe.disable_freeu()
@@ -157,8 +162,9 @@ def test_freeu_disabled(self):
for key in freeu_keys:
assert getattr(upsample_block, key) is None, f"Disabling of FreeU should have set {key} to None."
- output_no_freeu = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0), return_dict=False)[0]
-
+ inputs = self.get_dummy_inputs(torch_device)
+ inputs["return_dict"] = False
+ output_no_freeu = pipe(**inputs)[0]
assert np.allclose(
output[0, -3:, -3:, -1], output_no_freeu[0, -3:, -3:, -1]
), "Disabling of FreeU should lead to results similar to the default pipeline results."
From a076831153a3f28cc9adb1e198cbadf29b72b867 Mon Sep 17 00:00:00 2001
From: ultranity <1095429904@qq.com>
Date: Tue, 27 Feb 2024 20:52:31 +0800
Subject: [PATCH 16/17] fix I2V gen test error
---
tests/pipelines/test_pipelines_common.py | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py
index 0e3d20baeb8a..0ca464d3bd13 100644
--- a/tests/pipelines/test_pipelines_common.py
+++ b/tests/pipelines/test_pipelines_common.py
@@ -83,16 +83,20 @@ def test_vae_slicing(self):
inputs = self.get_dummy_inputs(device)
inputs["prompt"] = [inputs["prompt"]] * image_count
+ if "image" in inputs: # fix batch size mismatch in I2V_Gen pipeline
+ inputs["image"] = [inputs["image"]] * image_count
output_1 = pipe(**inputs)
# make sure sliced vae decode yields the same result
pipe.enable_vae_slicing()
inputs = self.get_dummy_inputs(device)
inputs["prompt"] = [inputs["prompt"]] * image_count
+ if "image" in inputs:
+ inputs["image"] = [inputs["image"]] * image_count
inputs["return_dict"] = False
output_2 = pipe(**inputs)
- assert np.abs(output_2[0].flatten() - output_1[0].flatten()).max() < 3e-3
+ assert np.abs(output_2[0].flatten() - output_1[0].flatten()).max() < 1e-2
def test_vae_tiling(self):
device = "cpu" # ensure determinism for the device-dependent torch.Generator
@@ -117,7 +121,7 @@ def test_vae_tiling(self):
inputs["return_dict"] = False
output_2 = pipe(**inputs)[0]
- assert np.abs(output_2.flatten() - output_1.flatten()).max() < 5e-1
+ assert np.abs(output_2 - output_1).max() < 5e-1
# test that tiled decode works with various shapes
shapes = [(1, 4, 73, 97), (1, 4, 97, 73), (1, 4, 49, 65), (1, 4, 65, 49)]
@@ -166,8 +170,8 @@ def test_freeu_disabled(self):
inputs["return_dict"] = False
output_no_freeu = pipe(**inputs)[0]
assert np.allclose(
- output[0, -3:, -3:, -1], output_no_freeu[0, -3:, -3:, -1]
- ), "Disabling of FreeU should lead to results similar to the default pipeline results."
+ output, output_no_freeu, atol=1e-2
+ ), f"Disabling of FreeU should lead to results similar to the default pipeline results but Max Abs Error={np.abs(output_no_freeu - output).max()}."
def test_fused_qkv_projections(self):
device = "cpu" # ensure determinism for the device-dependent torch.Generator
From 0fd684bc391bffe5790f291cc15bad2f5479e986 Mon Sep 17 00:00:00 2001
From: ultranity <1095429904@qq.com>
Date: Wed, 28 Feb 2024 12:36:00 +0800
Subject: [PATCH 17/17] add missing StableDiffusionMixin
---
.../community/clip_guided_images_mixing_stable_diffusion.py | 4 ++--
examples/community/clip_guided_stable_diffusion.py | 4 ++--
examples/community/clip_guided_stable_diffusion_img2img.py | 4 ++--
examples/community/imagic_stable_diffusion.py | 3 ++-
examples/community/interpolate_stable_diffusion.py | 4 ++--
examples/community/mixture_canvas.py | 4 ++--
examples/community/multilingual_stable_diffusion.py | 4 ++--
examples/community/pipeline_sdxl_style_aligned.py | 2 +-
examples/community/stable_diffusion_controlnet_img2img.py | 5 +++--
examples/community/stable_diffusion_controlnet_inpaint.py | 5 +++--
.../community/stable_diffusion_controlnet_inpaint_img2img.py | 5 +++--
examples/community/wildcard_stable_diffusion.py | 4 ++--
.../pipeline_stable_diffusion_pix2pix_zero.py | 4 ++--
13 files changed, 28 insertions(+), 24 deletions(-)
diff --git a/examples/community/clip_guided_images_mixing_stable_diffusion.py b/examples/community/clip_guided_images_mixing_stable_diffusion.py
index 6fcbb16963b8..16dcecd7b22a 100644
--- a/examples/community/clip_guided_images_mixing_stable_diffusion.py
+++ b/examples/community/clip_guided_images_mixing_stable_diffusion.py
@@ -12,12 +12,12 @@
from diffusers import (
AutoencoderKL,
DDIMScheduler,
- DiffusionPipeline,
DPMSolverMultistepScheduler,
LMSDiscreteScheduler,
PNDMScheduler,
UNet2DConditionModel,
)
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput
from diffusers.utils import PIL_INTERPOLATION
from diffusers.utils.torch_utils import randn_tensor
@@ -77,7 +77,7 @@ def set_requires_grad(model, value):
param.requires_grad = value
-class CLIPGuidedImagesMixingStableDiffusion(DiffusionPipeline):
+class CLIPGuidedImagesMixingStableDiffusion(DiffusionPipeline, StableDiffusionMixin):
def __init__(
self,
vae: AutoencoderKL,
diff --git a/examples/community/clip_guided_stable_diffusion.py b/examples/community/clip_guided_stable_diffusion.py
index 9065462940c2..4205718802de 100644
--- a/examples/community/clip_guided_stable_diffusion.py
+++ b/examples/community/clip_guided_stable_diffusion.py
@@ -10,12 +10,12 @@
from diffusers import (
AutoencoderKL,
DDIMScheduler,
- DiffusionPipeline,
DPMSolverMultistepScheduler,
LMSDiscreteScheduler,
PNDMScheduler,
UNet2DConditionModel,
)
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput
@@ -51,7 +51,7 @@ def set_requires_grad(model, value):
param.requires_grad = value
-class CLIPGuidedStableDiffusion(DiffusionPipeline):
+class CLIPGuidedStableDiffusion(DiffusionPipeline, StableDiffusionMixin):
"""CLIP guided stable diffusion based on the amazing repo by @crowsonkb and @Jack000
- https://github.com/Jack000/glid-3-xl
- https://github.dev/crowsonkb/k-diffusion
diff --git a/examples/community/clip_guided_stable_diffusion_img2img.py b/examples/community/clip_guided_stable_diffusion_img2img.py
index 83e117f02dd2..434d5253679a 100644
--- a/examples/community/clip_guided_stable_diffusion_img2img.py
+++ b/examples/community/clip_guided_stable_diffusion_img2img.py
@@ -12,12 +12,12 @@
from diffusers import (
AutoencoderKL,
DDIMScheduler,
- DiffusionPipeline,
DPMSolverMultistepScheduler,
LMSDiscreteScheduler,
PNDMScheduler,
UNet2DConditionModel,
)
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput
from diffusers.utils import PIL_INTERPOLATION, deprecate
from diffusers.utils.torch_utils import randn_tensor
@@ -125,7 +125,7 @@ def set_requires_grad(model, value):
param.requires_grad = value
-class CLIPGuidedStableDiffusion(DiffusionPipeline):
+class CLIPGuidedStableDiffusion(DiffusionPipeline, StableDiffusionMixin):
"""CLIP guided stable diffusion based on the amazing repo by @crowsonkb and @Jack000
- https://github.com/Jack000/glid-3-xl
- https://github.dev/crowsonkb/k-diffusion
diff --git a/examples/community/imagic_stable_diffusion.py b/examples/community/imagic_stable_diffusion.py
index 057d46c4522b..25048e946fe0 100644
--- a/examples/community/imagic_stable_diffusion.py
+++ b/examples/community/imagic_stable_diffusion.py
@@ -19,6 +19,7 @@
from diffusers import DiffusionPipeline
from diffusers.models import AutoencoderKL, UNet2DConditionModel
+from diffusers.pipelines.pipeline_utils import StableDiffusionMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
@@ -56,7 +57,7 @@ def preprocess(image):
return 2.0 * image - 1.0
-class ImagicStableDiffusionPipeline(DiffusionPipeline):
+class ImagicStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
r"""
Pipeline for imagic image editing.
See paper here: https://arxiv.org/pdf/2210.09276.pdf
diff --git a/examples/community/interpolate_stable_diffusion.py b/examples/community/interpolate_stable_diffusion.py
index 4c13e0046b9a..1b859c35f174 100644
--- a/examples/community/interpolate_stable_diffusion.py
+++ b/examples/community/interpolate_stable_diffusion.py
@@ -7,9 +7,9 @@
import torch
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
-from diffusers import DiffusionPipeline
from diffusers.configuration_utils import FrozenDict
from diffusers.models import AutoencoderKL, UNet2DConditionModel
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
@@ -46,7 +46,7 @@ def slerp(t, v0, v1, DOT_THRESHOLD=0.9995):
return v2
-class StableDiffusionWalkPipeline(DiffusionPipeline):
+class StableDiffusionWalkPipeline(DiffusionPipeline, StableDiffusionMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion.
diff --git a/examples/community/mixture_canvas.py b/examples/community/mixture_canvas.py
index 3737183e5513..2083c7acad38 100644
--- a/examples/community/mixture_canvas.py
+++ b/examples/community/mixture_canvas.py
@@ -12,7 +12,7 @@
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
from diffusers.models import AutoencoderKL, UNet2DConditionModel
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
@@ -264,7 +264,7 @@ def _quartic_weights(self, region: DiffusionRegion) -> torch.tensor:
return torch.tile(torch.tensor(weights), (self.nbatch, self.latent_space_dim, 1, 1))
-class StableDiffusionCanvasPipeline(DiffusionPipeline):
+class StableDiffusionCanvasPipeline(DiffusionPipeline, StableDiffusionMixin):
"""Stable Diffusion pipeline that mixes several diffusers in the same canvas"""
def __init__(
diff --git a/examples/community/multilingual_stable_diffusion.py b/examples/community/multilingual_stable_diffusion.py
index 0a3b49a14d7d..f3b0540cf4d3 100644
--- a/examples/community/multilingual_stable_diffusion.py
+++ b/examples/community/multilingual_stable_diffusion.py
@@ -11,9 +11,9 @@
pipeline,
)
-from diffusers import DiffusionPipeline
from diffusers.configuration_utils import FrozenDict
from diffusers.models import AutoencoderKL, UNet2DConditionModel
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
@@ -48,7 +48,7 @@ def translate_prompt(prompt, translation_tokenizer, translation_model, device):
return en_trans[0]
-class MultilingualStableDiffusion(DiffusionPipeline):
+class MultilingualStableDiffusion(DiffusionPipeline, StableDiffusionMixin):
r"""
Pipeline for text-to-image generation using Stable Diffusion in different languages.
diff --git a/examples/community/pipeline_sdxl_style_aligned.py b/examples/community/pipeline_sdxl_style_aligned.py
index db19533a3d2c..ec4aa3791557 100644
--- a/examples/community/pipeline_sdxl_style_aligned.py
+++ b/examples/community/pipeline_sdxl_style_aligned.py
@@ -85,7 +85,7 @@
>>> from typing import List
>>> import torch
- >>> from diffusers.pipelines.pipeline_utils import DiffusionPipeline,StableDiffusionMixin
+ >>> from diffusers.pipelines.pipeline_utils import DiffusionPipeline
>>> from PIL import Image
>>> model_id = "a-r-r-o-w/dreamshaper-xl-turbo"
diff --git a/examples/community/stable_diffusion_controlnet_img2img.py b/examples/community/stable_diffusion_controlnet_img2img.py
index f961c767e416..5f9083616a84 100644
--- a/examples/community/stable_diffusion_controlnet_img2img.py
+++ b/examples/community/stable_diffusion_controlnet_img2img.py
@@ -8,8 +8,9 @@
import torch
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
-from diffusers import AutoencoderKL, ControlNetModel, DiffusionPipeline, UNet2DConditionModel, logging
+from diffusers import AutoencoderKL, ControlNetModel, UNet2DConditionModel, logging
from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
@@ -128,7 +129,7 @@ def prepare_controlnet_conditioning_image(
return controlnet_conditioning_image
-class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline):
+class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, StableDiffusionMixin):
"""
Inspired by: https://github.com/haofanwang/ControlNet-for-Diffusers/
"""
diff --git a/examples/community/stable_diffusion_controlnet_inpaint.py b/examples/community/stable_diffusion_controlnet_inpaint.py
index 76e6e331abcb..0173ed41bee6 100644
--- a/examples/community/stable_diffusion_controlnet_inpaint.py
+++ b/examples/community/stable_diffusion_controlnet_inpaint.py
@@ -9,8 +9,9 @@
import torch.nn.functional as F
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
-from diffusers import AutoencoderKL, ControlNetModel, DiffusionPipeline, UNet2DConditionModel, logging
+from diffusers import AutoencoderKL, ControlNetModel, UNet2DConditionModel, logging
from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
@@ -226,7 +227,7 @@ def prepare_controlnet_conditioning_image(
return controlnet_conditioning_image
-class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline):
+class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, StableDiffusionMixin):
"""
Inspired by: https://github.com/haofanwang/ControlNet-for-Diffusers/
"""
diff --git a/examples/community/stable_diffusion_controlnet_inpaint_img2img.py b/examples/community/stable_diffusion_controlnet_inpaint_img2img.py
index 34b8170f66c8..d056eb112165 100644
--- a/examples/community/stable_diffusion_controlnet_inpaint_img2img.py
+++ b/examples/community/stable_diffusion_controlnet_inpaint_img2img.py
@@ -9,7 +9,8 @@
import torch.nn.functional as F
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
-from diffusers import AutoencoderKL, ControlNetModel, DiffusionPipeline, UNet2DConditionModel, logging
+from diffusers import AutoencoderKL, ControlNetModel, UNet2DConditionModel, logging
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
@@ -215,7 +216,7 @@ def prepare_controlnet_conditioning_image(
return controlnet_conditioning_image
-class StableDiffusionControlNetInpaintImg2ImgPipeline(DiffusionPipeline):
+class StableDiffusionControlNetInpaintImg2ImgPipeline(DiffusionPipeline, StableDiffusionMixin):
"""
Inspired by: https://github.com/haofanwang/ControlNet-for-Diffusers/
"""
diff --git a/examples/community/wildcard_stable_diffusion.py b/examples/community/wildcard_stable_diffusion.py
index 1a5ea350b857..241e661536d3 100644
--- a/examples/community/wildcard_stable_diffusion.py
+++ b/examples/community/wildcard_stable_diffusion.py
@@ -8,9 +8,9 @@
import torch
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
-from diffusers import DiffusionPipeline
from diffusers.configuration_utils import FrozenDict
from diffusers.models import AutoencoderKL, UNet2DConditionModel
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
@@ -63,7 +63,7 @@ class WildcardStableDiffusionOutput(StableDiffusionPipelineOutput):
prompts: List[str]
-class WildcardStableDiffusionPipeline(DiffusionPipeline):
+class WildcardStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
r"""
Example Usage:
pipe = WildcardStableDiffusionPipeline.from_pretrained(
diff --git a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py
index c2e2369f27f8..c819e5728181 100644
--- a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py
+++ b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py
@@ -46,7 +46,7 @@
unscale_lora_layers,
)
from ....utils.torch_utils import randn_tensor
-from ...pipeline_utils import DiffusionPipeline
+from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from ...stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -280,7 +280,7 @@ def __call__(
return hidden_states
-class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline):
+class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin):
r"""
Pipeline for pixel-level image editing using Pix2Pix Zero. Based on Stable Diffusion.