From 4f1e3787e3e17f497428972134a1983d19cc1cf5 Mon Sep 17 00:00:00 2001 From: 1lint <105617163+1lint@users.noreply.github.com> Date: Tue, 28 Mar 2023 12:00:28 -0700 Subject: [PATCH 01/11] add mixin class for pipeline from original sd ckpt --- .../stable_diffusion/convert_from_ckpt.py | 133 +++++++++++++++--- .../pipeline_stable_diffusion.py | 3 +- .../pipeline_stable_diffusion_img2img.py | 3 +- ...ipeline_stable_diffusion_inpaint_legacy.py | 3 +- tests/test_pipelines.py | 21 +++ 5 files changed, 140 insertions(+), 23 deletions(-) diff --git a/src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py b/src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py index a16213639526..e1e7d0049ac6 100644 --- a/src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +++ b/src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py @@ -17,6 +17,7 @@ import re from io import BytesIO from typing import Optional +from pathlib import Path import requests import torch @@ -31,32 +32,36 @@ CLIPVisionModelWithProjection, ) -from diffusers import ( +from huggingface_hub import hf_hub_download + +from ...models import ( AutoencoderKL, + UNet2DConditionModel, + PriorTransformer, ControlNetModel, +) + +from ...schedulers import ( DDIMScheduler, DDPMScheduler, DPMSolverMultistepScheduler, EulerAncestralDiscreteScheduler, EulerDiscreteScheduler, HeunDiscreteScheduler, - LDMTextToImagePipeline, LMSDiscreteScheduler, PNDMScheduler, - PriorTransformer, - StableDiffusionControlNetPipeline, - StableDiffusionPipeline, - StableUnCLIPImg2ImgPipeline, - StableUnCLIPPipeline, UnCLIPScheduler, - UNet2DConditionModel, ) -from diffusers.pipelines.latent_diffusion.pipeline_latent_diffusion import LDMBertConfig, LDMBertModel -from diffusers.pipelines.paint_by_example import PaintByExampleImageEncoder, PaintByExamplePipeline -from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker -from diffusers.pipelines.stable_diffusion.stable_unclip_image_normalizer import StableUnCLIPImageNormalizer -from ...utils import is_omegaconf_available, is_safetensors_available, logging +from ..latent_diffusion.pipeline_latent_diffusion import LDMBertConfig, LDMBertModel +from ..paint_by_example import PaintByExampleImageEncoder +from .safety_checker import StableDiffusionSafetyChecker + +from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer + +from ..pipeline_utils import DiffusionPipeline + +from ...utils import is_omegaconf_available, is_safetensors_available, logging, DIFFUSERS_CACHE, HF_HUB_OFFLINE from ...utils.import_utils import BACKENDS_MAPPING @@ -990,7 +995,8 @@ def download_from_original_stable_diffusion_ckpt( clip_stats_path: Optional[str] = None, controlnet: Optional[bool] = None, load_safety_checker: bool = True, -) -> StableDiffusionPipeline: + pipeline_class: DiffusionPipeline = None, +) -> DiffusionPipeline: """ Load a Stable Diffusion pipeline object from a CompVis-style `.ckpt`/`.safetensors` file and (ideally) a `.yaml` config file. @@ -1026,12 +1032,29 @@ def download_from_original_stable_diffusion_ckpt( Whether the attention computation should always be upcasted. This is necessary when running stable diffusion 2.1. device (`str`, *optional*, defaults to `None`): - The device to use. Pass `None` to determine automatically. :param from_safetensors: If `checkpoint_path` is - in `safetensors` format, load checkpoint with safetensors instead of PyTorch. :return: A - StableDiffusionPipeline object representing the passed-in `.ckpt`/`.safetensors` file. + The device to use. Pass `None` to determine automatically. + from_safetensors (`str`, *optional*, defaults to `False`): + If `checkpoint_path` is in `safetensors` format, load checkpoint with safetensors instead of PyTorch. load_safety_checker (`bool`, *optional*, defaults to `True`): Whether to load the safety checker or not. Defaults to `True`. + pipeline_class (`str`, *optional*, defaults to `None`): + The pipeline class to use. Pass `None` to determine automatically. + return: A StableDiffusionPipeline object representing the passed-in `.ckpt`/`.safetensors` file. """ + + # import pipelines here to avoid circular import error when using from_ckpt method + from diffusers import ( + StableDiffusionControlNetPipeline, + StableDiffusionPipeline, + StableUnCLIPImg2ImgPipeline, + StableUnCLIPPipeline, + LDMTextToImagePipeline, + PaintByExamplePipeline, + ) + + if pipeline_class is None: + pipeline_class = StableDiffusionPipeline + if prediction_type == "v-prediction": prediction_type = "v_prediction" @@ -1193,7 +1216,7 @@ def download_from_original_stable_diffusion_ckpt( requires_safety_checker=False, ) else: - pipe = StableDiffusionPipeline( + pipe = pipeline_class( vae=vae, text_encoder=text_model, tokenizer=tokenizer, @@ -1293,7 +1316,7 @@ def download_from_original_stable_diffusion_ckpt( feature_extractor=feature_extractor, ) else: - pipe = StableDiffusionPipeline( + pipe = pipeline_class( vae=vae, text_encoder=text_model, tokenizer=tokenizer, @@ -1320,7 +1343,7 @@ def download_controlnet_from_original_ckpt( upcast_attention: Optional[bool] = None, device: str = None, from_safetensors: bool = False, -) -> StableDiffusionPipeline: +) -> DiffusionPipeline: if not is_omegaconf_available(): raise ValueError(BACKENDS_MAPPING["omegaconf"][1]) @@ -1361,3 +1384,73 @@ def download_controlnet_from_original_ckpt( ) return controlnet_model + + +class FromCkptMixin: + @classmethod + def from_ckpt(cls, model_path_or_checkpoint, **kwargs): + pipeline_name = cls.__name__ + + file_extension = model_path_or_checkpoint.rsplit(".", 1)[-1] + from_safetensors = file_extension == "safetensors" + + stable_unclip = None + controlnet = False + if pipeline_name == "StableDiffusionControlNetPipeline": + model_type = "FrozenCLIPEmbedder" + controlnet = True + + elif "StableDiffusion" in pipeline_name: + model_type = "FrozenCLIPEmbedder" + + elif pipeline_name == "StableUnCLIPPipeline": + model_type == "FrozenOpenCLIPEmbedder" + stable_unclip = "txt2img" + + elif pipeline_name == "StableUnCLIPImg2ImgPipeline": + model_type == "FrozenOpenCLIPEmbedder" + stable_unclip = "img2img" + + elif pipeline_name == "PaintByExamplePipeline": + model_type == "PaintByExample" + + elif pipeline_name == "LDMTextToImagePipeline": + model_type == "LDMTextToImage" + + else: + raise ValueError(f"Unhandled pipeline class: {pipeline_name}") + + # Code based on diffusers.pipelines.pipeline_utils.DiffusionPipeline.from_pretrained + ckpt_path = Path(model_path_or_checkpoint) + if not ckpt_path.is_file(): + # get repo_id and (potentially nested) file path of ckpt in repo + repo_id = str(Path().joinpath(*ckpt_path.parts[:2])) + file_path = str(Path().joinpath(*ckpt_path.parts[2:])) + + cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE) + resume_download = kwargs.pop("resume_download", False) + local_files_only = kwargs.pop("local_files_only", HF_HUB_OFFLINE) + revision = kwargs.pop("revision", None) + proxies = kwargs.pop("proxies", None) + use_auth_token = kwargs.pop("use_auth_token", None) + + model_path_or_checkpoint = hf_hub_download( + repo_id, + filename=file_path, + cache_dir=cache_dir, + resume_download=resume_download, + proxies=proxies, + local_files_only=local_files_only, + use_auth_token=use_auth_token, + revision=revision, + ) + + return download_from_original_stable_diffusion_ckpt( + model_path_or_checkpoint, + pipeline_class=cls, + model_type=model_type, + stable_unclip=stable_unclip, + controlnet=controlnet, + from_safetensors=from_safetensors, + **kwargs, + ) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index b428b4341849..ff5afcfaf008 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -34,6 +34,7 @@ from . import StableDiffusionPipelineOutput from .safety_checker import StableDiffusionSafetyChecker +from .convert_from_ckpt import FromCkptMixin logger = logging.get_logger(__name__) # pylint: disable=invalid-name @@ -52,7 +53,7 @@ """ -class StableDiffusionPipeline(DiffusionPipeline): +class StableDiffusionPipeline(DiffusionPipeline, FromCkptMixin): r""" Pipeline for text-to-image generation using Stable Diffusion. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 14512e180992..e6043780a742 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -38,6 +38,7 @@ from . import StableDiffusionPipelineOutput from .safety_checker import StableDiffusionSafetyChecker +from .convert_from_ckpt import FromCkptMixin logger = logging.get_logger(__name__) # pylint: disable=invalid-name @@ -91,7 +92,7 @@ def preprocess(image): return image -class StableDiffusionImg2ImgPipeline(DiffusionPipeline): +class StableDiffusionImg2ImgPipeline(DiffusionPipeline, FromCkptMixin): r""" Pipeline for text-guided image to image generation using Stable Diffusion. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py index feb13d100089..5db05257f5b6 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py @@ -36,6 +36,7 @@ from . import StableDiffusionPipelineOutput from .safety_checker import StableDiffusionSafetyChecker +from .convert_from_ckpt import FromCkptMixin logger = logging.get_logger(__name__) @@ -81,7 +82,7 @@ def preprocess_mask(mask, scale_factor=8): return mask -class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline): +class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline, FromCkptMixin): r""" Pipeline for text-guided image inpainting using Stable Diffusion. *This is an experimental feature*. diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index 2616223c5447..32394ac9789f 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -362,6 +362,27 @@ def test_download_broken_variant(self): diffusers.utils.import_utils._safetensors_available = True + def test_download_from_ckpt(self): + with tempfile.TemporaryDirectory() as tmpdirname: + ckpt_paths = [ + "runwayml/stable-diffusion-v1-5/v1-5-pruned-emaonly.ckpt", + "WarriorMama777/OrangeMixs/blob/main/Models/AbyssOrangeMix/AbyssOrangeMix_base.ckpt", + ] + + for ckpt_path in ckpt_paths: + StableDiffusionPipeline.from_ckpt(ckpt_path, cache_dir=tmpdirname) + + ckpt_names = [os.path.basename(ckpt_path) for ckpt_path in ckpt_paths] + + files = [] + for cache in os.listdir(tmpdirname): + snapshots = os.path.join(tmpdirname, cache, "snapshots") + all_root_files = [t[-1] for t in os.walk(snapshots)] + files += [item for sublist in all_root_files for item in sublist] + + # check that downloaded filenames match checkpoint filenames + assert set(ckpt_names) == set(files) + class CustomPipelineTests(unittest.TestCase): def test_load_custom_pipeline(self): From 82754c64df57a08bae18c8c7ae8d3e393918084f Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Wed, 19 Apr 2023 12:37:50 +0000 Subject: [PATCH 02/11] Improve --- .../stable_diffusion/convert_from_ckpt.py | 216 ++++++++++++++++-- 1 file changed, 196 insertions(+), 20 deletions(-) diff --git a/src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py b/src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py index e1e7d0049ac6..9d2d8a1836e5 100644 --- a/src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +++ b/src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py @@ -1387,54 +1387,220 @@ def download_controlnet_from_original_ckpt( class FromCkptMixin: + """ This helper class allows to directly load .ckpt stable diffusion file_extension + into the respective classes. """ + @classmethod - def from_ckpt(cls, model_path_or_checkpoint, **kwargs): + def from_ckpt(cls, pretrained_model_name_or_path, **kwargs): + r""" + Instantiate a PyTorch diffusion pipeline from pre-trained pipeline weights saved in the original .ckpt format. + + The pipeline is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated). + + Parameters: + pretrained_model_name_or_path (`str` or `os.PathLike`, *optional*): + Can be either: + + - A string, the *repo id* of a pretrained pipeline hosted inside a model repo on + https://huggingface.co/ Valid repo ids have to be located under a user or organization name, like + `CompVis/ldm-text2im-large-256`. + - A path to a *directory* containing pipeline weights saved using + [`~DiffusionPipeline.save_pretrained`], e.g., `./my_pipeline_directory/`. + torch_dtype (`str` or `torch.dtype`, *optional*): + Override the default `torch.dtype` and load the model under this dtype. If `"auto"` is passed the dtype + will be automatically derived from the model's weights. + custom_pipeline (`str`, *optional*): + + + + This is an experimental feature and is likely to change in the future. + + + + Can be either: + + - A string, the *repo id* of a custom pipeline hosted inside a model repo on + https://huggingface.co/. Valid repo ids have to be located under a user or organization name, + like `hf-internal-testing/diffusers-dummy-pipeline`. + + + + It is required that the model repo has a file, called `pipeline.py` that defines the custom + pipeline. + + + + - A string, the *file name* of a community pipeline hosted on GitHub under + https://github.com/huggingface/diffusers/tree/main/examples/community. Valid file names have to + match exactly the file name without `.py` located under the above link, *e.g.* + `clip_guided_stable_diffusion`. + + + + Community pipelines are always loaded from the current `main` branch of GitHub. + + + + - A path to a *directory* containing a custom pipeline, e.g., `./my_pipeline_directory/`. + + + + It is required that the directory has a file, called `pipeline.py` that defines the custom + pipeline. + + + + For more information on how to load and create custom pipelines, please have a look at [Loading and + Adding Custom + Pipelines](https://huggingface.co/docs/diffusers/using-diffusers/custom_pipeline_overview) + + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + cache_dir (`Union[str, os.PathLike]`, *optional*): + Path to a directory in which a downloaded pretrained model configuration should be cached if the + standard cache should not be used. + resume_download (`bool`, *optional*, defaults to `False`): + Whether or not to delete incompletely received files. Will attempt to resume the download if such a + file exists. + proxies (`Dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + output_loading_info(`bool`, *optional*, defaults to `False`): + Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages. + local_files_only(`bool`, *optional*, defaults to `False`): + Whether or not to only look at local files (i.e., do not try to download the model). + use_auth_token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated + when running `huggingface-cli login` (stored in `~/.huggingface`). + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a + git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any + identifier allowed by git. + custom_revision (`str`, *optional*, defaults to `"main"` when loading from the Hub and to local version of `diffusers` when loading from GitHub): + The specific model version to use. It can be a branch name, a tag name, or a commit id similar to + `revision` when loading a custom pipeline from the Hub. It can be a diffusers version when loading a + custom pipeline from GitHub. + mirror (`str`, *optional*): + Mirror source to accelerate downloads in China. If you are from China and have an accessibility + problem, you can set this option to resolve it. Note that we do not guarantee the timeliness or safety. + Please refer to the mirror site for more information. specify the folder name here. + device_map (`str` or `Dict[str, Union[int, str, torch.device]]`, *optional*): + A map that specifies where each submodule should go. It doesn't need to be refined to each + parameter/buffer name, once a given module name is inside, every submodule of it will be sent to the + same device. + + To have Accelerate compute the most optimized `device_map` automatically, set `device_map="auto"`. For + more information about each option see [designing a device + map](https://hf.co/docs/accelerate/main/en/usage_guides/big_modeling#designing-a-device-map). + low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`): + Speed up model loading by not initializing the weights and only loading the pre-trained weights. This + also tries to not use more than 1x model size in CPU memory (including peak memory) while loading the + model. This is only supported when torch version >= 1.9.0. If you are using an older version of torch, + setting this argument to `True` will raise an error. + use_safetensors (`bool`, *optional* ): + If set to `True`, the pipeline will be loaded from `safetensors` weights. If set to `None` (the + default). The pipeline will load using `safetensors` if the safetensors weights are available *and* if + `safetensors` is installed. If the to `False` the pipeline will *not* use `safetensors`. + kwargs (remaining dictionary of keyword arguments, *optional*): + Can be used to overwrite load - and saveable variables - *i.e.* the pipeline components - of the + specific pipeline class. The overwritten components are then directly passed to the pipelines + `__init__` method. See example below for more information. + variant (`str`, *optional*): + If specified load weights from `variant` filename, *e.g.* pytorch_model..bin. `variant` is + ignored when using `from_flax`. + + + + It is required to be logged in (`huggingface-cli login`) when you want to use private or [gated + models](https://huggingface.co/docs/hub/models-gated#gated-models), *e.g.* `"runwayml/stable-diffusion-v1-5"` + + + + + + Activate the special ["offline-mode"](https://huggingface.co/diffusers/installation.html#offline-mode) to use + this method in a firewalled environment. + + + + Examples: + + ```py + >>> from diffusers import DiffusionPipeline + + >>> # Download pipeline from huggingface.co and cache. + >>> pipeline = DiffusionPipeline.from_pretrained("CompVis/ldm-text2im-large-256") + + >>> # Download pipeline that requires an authorization token + >>> # For more information on access tokens, please refer to this section + >>> # of the documentation](https://huggingface.co/docs/hub/security-tokens) + >>> pipeline = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5") + + >>> # Use a different scheduler + >>> from diffusers import LMSDiscreteScheduler + + >>> scheduler = LMSDiscreteScheduler.from_config(pipeline.scheduler.config) + >>> pipeline.scheduler = scheduler + ``` + """ + cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE) + resume_download = kwargs.pop("resume_download", False) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", HF_HUB_OFFLINE) + use_auth_token = kwargs.pop("use_auth_token", None) + revision = kwargs.pop("revision", None) + provider = kwargs.pop("provider", None) + sess_options = kwargs.pop("sess_options", None) + extract_ema = kwargs.pop("extract_ema", False) + image_size = kwargs.pop("image_size", 512) + scheduler_type = kwargs.pop("scheduler_type", "pndm") + num_in_channels = kwargs.pop("num_in_channels", None) + upcast_attention = kwargs.pop("upcast_attention", None) + load_safety_checker = kwargs.pop("load_safety_checker", True) + + use_safetensors = kwargs.pop("use_safetensors", None if is_safetensors_available() else False) + pipeline_name = cls.__name__ - file_extension = model_path_or_checkpoint.rsplit(".", 1)[-1] + file_extension = pretrained_model_name_or_path.rsplit(".", 1)[-1] from_safetensors = file_extension == "safetensors" + if from_safetensors and use_safetensors is True: + raise ValueError("Make sure to install `safetensors` with `pip install safetensors`.") + + + # TODO: For now we only support stable diffusion stable_unclip = None controlnet = False + if pipeline_name == "StableDiffusionControlNetPipeline": model_type = "FrozenCLIPEmbedder" controlnet = True - elif "StableDiffusion" in pipeline_name: model_type = "FrozenCLIPEmbedder" - elif pipeline_name == "StableUnCLIPPipeline": model_type == "FrozenOpenCLIPEmbedder" stable_unclip = "txt2img" - elif pipeline_name == "StableUnCLIPImg2ImgPipeline": model_type == "FrozenOpenCLIPEmbedder" stable_unclip = "img2img" - elif pipeline_name == "PaintByExamplePipeline": model_type == "PaintByExample" - elif pipeline_name == "LDMTextToImagePipeline": model_type == "LDMTextToImage" - else: raise ValueError(f"Unhandled pipeline class: {pipeline_name}") # Code based on diffusers.pipelines.pipeline_utils.DiffusionPipeline.from_pretrained - ckpt_path = Path(model_path_or_checkpoint) + ckpt_path = Path(pretrained_model_name_or_path) if not ckpt_path.is_file(): # get repo_id and (potentially nested) file path of ckpt in repo repo_id = str(Path().joinpath(*ckpt_path.parts[:2])) file_path = str(Path().joinpath(*ckpt_path.parts[2:])) - cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE) - resume_download = kwargs.pop("resume_download", False) - local_files_only = kwargs.pop("local_files_only", HF_HUB_OFFLINE) - revision = kwargs.pop("revision", None) - proxies = kwargs.pop("proxies", None) - use_auth_token = kwargs.pop("use_auth_token", None) - - model_path_or_checkpoint = hf_hub_download( + pretrained_model_name_or_path = hf_hub_download( repo_id, filename=file_path, cache_dir=cache_dir, @@ -1443,14 +1609,24 @@ def from_ckpt(cls, model_path_or_checkpoint, **kwargs): local_files_only=local_files_only, use_auth_token=use_auth_token, revision=revision, + force_download=force_download, + provider=provider, + sess_options=sess_options, ) - return download_from_original_stable_diffusion_ckpt( - model_path_or_checkpoint, + torch_dtype = kwargs.pop("torch_dtype", None) + + pipe = download_from_original_stable_diffusion_ckpt( + pretrained_model_name_or_path, pipeline_class=cls, model_type=model_type, stable_unclip=stable_unclip, controlnet=controlnet, from_safetensors=from_safetensors, - **kwargs, + extract_ema=extract_ema, + image_size=image_size, + scheduler_type=scheduler_type, + num_in_channels=num_in_channels, + upcast_attention=upcast_attention, + load_safety_checker=load_safety_checker, ) From de3cd0d99eaba343f657f2b7bf9c39b70091471a Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Wed, 19 Apr 2023 12:38:10 +0000 Subject: [PATCH 03/11] make style --- .../stable_diffusion/convert_from_ckpt.py | 33 ++++++++----------- .../pipeline_stable_diffusion.py | 2 +- .../pipeline_stable_diffusion_img2img.py | 2 +- ...ipeline_stable_diffusion_inpaint_legacy.py | 2 +- 4 files changed, 16 insertions(+), 23 deletions(-) diff --git a/src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py b/src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py index 9d2d8a1836e5..eced8db2b4c4 100644 --- a/src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +++ b/src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py @@ -16,11 +16,12 @@ import re from io import BytesIO -from typing import Optional from pathlib import Path +from typing import Optional import requests import torch +from huggingface_hub import hf_hub_download from transformers import ( AutoFeatureExtractor, BertTokenizerFast, @@ -32,15 +33,12 @@ CLIPVisionModelWithProjection, ) -from huggingface_hub import hf_hub_download - from ...models import ( AutoencoderKL, - UNet2DConditionModel, - PriorTransformer, ControlNetModel, + PriorTransformer, + UNet2DConditionModel, ) - from ...schedulers import ( DDIMScheduler, DDPMScheduler, @@ -52,18 +50,14 @@ PNDMScheduler, UnCLIPScheduler, ) - +from ...utils import DIFFUSERS_CACHE, HF_HUB_OFFLINE, is_omegaconf_available, is_safetensors_available, logging +from ...utils.import_utils import BACKENDS_MAPPING from ..latent_diffusion.pipeline_latent_diffusion import LDMBertConfig, LDMBertModel from ..paint_by_example import PaintByExampleImageEncoder +from ..pipeline_utils import DiffusionPipeline from .safety_checker import StableDiffusionSafetyChecker - from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer -from ..pipeline_utils import DiffusionPipeline - -from ...utils import is_omegaconf_available, is_safetensors_available, logging, DIFFUSERS_CACHE, HF_HUB_OFFLINE -from ...utils.import_utils import BACKENDS_MAPPING - logger = logging.get_logger(__name__) # pylint: disable=invalid-name @@ -1044,12 +1038,12 @@ def download_from_original_stable_diffusion_ckpt( # import pipelines here to avoid circular import error when using from_ckpt method from diffusers import ( + LDMTextToImagePipeline, + PaintByExamplePipeline, StableDiffusionControlNetPipeline, StableDiffusionPipeline, StableUnCLIPImg2ImgPipeline, StableUnCLIPPipeline, - LDMTextToImagePipeline, - PaintByExamplePipeline, ) if pipeline_class is None: @@ -1387,8 +1381,8 @@ def download_controlnet_from_original_ckpt( class FromCkptMixin: - """ This helper class allows to directly load .ckpt stable diffusion file_extension - into the respective classes. """ + """This helper class allows to directly load .ckpt stable diffusion file_extension + into the respective classes.""" @classmethod def from_ckpt(cls, pretrained_model_name_or_path, **kwargs): @@ -1570,7 +1564,6 @@ def from_ckpt(cls, pretrained_model_name_or_path, **kwargs): if from_safetensors and use_safetensors is True: raise ValueError("Make sure to install `safetensors` with `pip install safetensors`.") - # TODO: For now we only support stable diffusion stable_unclip = None controlnet = False @@ -1614,9 +1607,9 @@ def from_ckpt(cls, pretrained_model_name_or_path, **kwargs): sess_options=sess_options, ) - torch_dtype = kwargs.pop("torch_dtype", None) + kwargs.pop("torch_dtype", None) - pipe = download_from_original_stable_diffusion_ckpt( + download_from_original_stable_diffusion_ckpt( pretrained_model_name_or_path, pipeline_class=cls, model_type=model_type, diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index ff5afcfaf008..be52f25f691a 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -32,9 +32,9 @@ ) from ..pipeline_utils import DiffusionPipeline from . import StableDiffusionPipelineOutput +from .convert_from_ckpt import FromCkptMixin from .safety_checker import StableDiffusionSafetyChecker -from .convert_from_ckpt import FromCkptMixin logger = logging.get_logger(__name__) # pylint: disable=invalid-name diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index e6043780a742..ced999330b93 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -36,9 +36,9 @@ ) from ..pipeline_utils import DiffusionPipeline from . import StableDiffusionPipelineOutput +from .convert_from_ckpt import FromCkptMixin from .safety_checker import StableDiffusionSafetyChecker -from .convert_from_ckpt import FromCkptMixin logger = logging.get_logger(__name__) # pylint: disable=invalid-name diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py index 5db05257f5b6..dc9e3ed4aa49 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py @@ -34,9 +34,9 @@ ) from ..pipeline_utils import DiffusionPipeline from . import StableDiffusionPipelineOutput +from .convert_from_ckpt import FromCkptMixin from .safety_checker import StableDiffusionSafetyChecker -from .convert_from_ckpt import FromCkptMixin logger = logging.get_logger(__name__) From f8f607f6a5deb7fbc61ea143a8f1a04a8e1dceb3 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Wed, 19 Apr 2023 12:59:23 +0000 Subject: [PATCH 04/11] merge main into --- .../stable_diffusion/convert_from_ckpt.py | 39 ++++++++++--------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py b/src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py index 043c02195918..a42c131842ef 100644 --- a/src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +++ b/src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py @@ -982,7 +982,6 @@ def download_from_original_stable_diffusion_ckpt( image_size: int = 512, prediction_type: str = None, model_type: str = None, - is_img2img: bool = False, extract_ema: bool = False, scheduler_type: str = "pndm", num_in_channels: Optional[int] = None, @@ -1053,9 +1052,6 @@ def download_from_original_stable_diffusion_ckpt( PaintByExamplePipeline, ) - if pipeline_class is None: - pipeline_class = StableDiffusionPipeline - if pipeline_class is None: pipeline_class = StableDiffusionPipeline @@ -1564,6 +1560,8 @@ def from_ckpt(cls, pretrained_model_name_or_path, **kwargs): upcast_attention = kwargs.pop("upcast_attention", None) load_safety_checker = kwargs.pop("load_safety_checker", True) + torch_dtype = kwargs.pop("torch_dtype", None) + use_safetensors = kwargs.pop("use_safetensors", None if is_safetensors_available() else False) pipeline_name = cls.__name__ @@ -1617,19 +1615,22 @@ def from_ckpt(cls, pretrained_model_name_or_path, **kwargs): sess_options=sess_options, ) - kwargs.pop("torch_dtype", None) - - download_from_original_stable_diffusion_ckpt( - pretrained_model_name_or_path, - pipeline_class=cls, - model_type=model_type, - stable_unclip=stable_unclip, - controlnet=controlnet, - from_safetensors=from_safetensors, - extract_ema=extract_ema, - image_size=image_size, - scheduler_type=scheduler_type, - num_in_channels=num_in_channels, - upcast_attention=upcast_attention, - load_safety_checker=load_safety_checker, + pipe = download_from_original_stable_diffusion_ckpt( + pretrained_model_name_or_path, + pipeline_class=cls, + model_type=model_type, + stable_unclip=stable_unclip, + controlnet=controlnet, + from_safetensors=from_safetensors, + extract_ema=extract_ema, + image_size=image_size, + scheduler_type=scheduler_type, + num_in_channels=num_in_channels, + upcast_attention=upcast_attention, + load_safety_checker=load_safety_checker, ) + + if torch_dtype is not None: + pipe.to(torch_dtype) + + return pipe From 61dbeeed1afc8c4a354d07551fe54e28ea15d070 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Wed, 19 Apr 2023 13:59:22 +0000 Subject: [PATCH 05/11] Improve more --- docs/source/en/api/loaders.mdx | 4 + src/diffusers/loaders.py | 191 +++++++++++++ .../stable_diffusion/convert_from_ckpt.py | 266 +----------------- .../pipeline_stable_diffusion.py | 3 +- .../pipeline_stable_diffusion_img2img.py | 3 +- ...ipeline_stable_diffusion_inpaint_legacy.py | 3 +- .../stable_diffusion/test_stable_diffusion.py | 57 ++++ tests/pipelines/test_pipelines.py | 22 +- 8 files changed, 261 insertions(+), 288 deletions(-) diff --git a/docs/source/en/api/loaders.mdx b/docs/source/en/api/loaders.mdx index 8cbf21b8e0cf..20134a0afe66 100644 --- a/docs/source/en/api/loaders.mdx +++ b/docs/source/en/api/loaders.mdx @@ -36,3 +36,7 @@ API to load such adapter neural networks via the [`loaders.py` module](https://g ### LoraLoaderMixin [[autodoc]] loaders.LoraLoaderMixin + +### FromCkptMixin + +[[autodoc]] loaders.FromCkptMixin diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py index e814981a85c9..790ebf49815f 100644 --- a/src/diffusers/loaders.py +++ b/src/diffusers/loaders.py @@ -13,9 +13,11 @@ # limitations under the License. import os from collections import defaultdict +from pathlib import Path from typing import Callable, Dict, List, Optional, Union import torch +from huggingface_hub import hf_hub_download from .models.attention_processor import LoRAAttnProcessor from .utils import ( @@ -1051,3 +1053,192 @@ def save_function(weights, filename): save_function(state_dict, os.path.join(save_directory, weight_name)) logger.info(f"Model weights saved in {os.path.join(save_directory, weight_name)}") + + +class FromCkptMixin: + """This helper class allows to directly load .ckpt stable diffusion file_extension + into the respective classes.""" + + @classmethod + def from_ckpt(cls, pretrained_model_name_or_path, **kwargs): + r""" + Instantiate a PyTorch diffusion pipeline from pre-trained pipeline weights saved in the original .ckpt format. + + The pipeline is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated). + + Parameters: + pretrained_model_name_or_path (`str` or `os.PathLike`, *optional*): + Can be either: + - A link to the .ckpt file on the Hub. Should be in the format + `"https://huggingface.co//blob/main/"` + - A path to a *file* containing all pipeline weights. + torch_dtype (`str` or `torch.dtype`, *optional*): + Override the default `torch.dtype` and load the model under this dtype. If `"auto"` is passed the dtype + will be automatically derived from the model's weights. + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + cache_dir (`Union[str, os.PathLike]`, *optional*): + Path to a directory in which a downloaded pretrained model configuration should be cached if the + standard cache should not be used. + resume_download (`bool`, *optional*, defaults to `False`): + Whether or not to delete incompletely received files. Will attempt to resume the download if such a + file exists. + proxies (`Dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + local_files_only (`bool`, *optional*, defaults to `False`): + Whether or not to only look at local files (i.e., do not try to download the model). + use_auth_token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated + when running `huggingface-cli login` (stored in `~/.huggingface`). + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a + git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any + identifier allowed by git. + use_safetensors (`bool`, *optional* ): + If set to `True`, the pipeline will be loaded from `safetensors` weights. If set to `None` (the + default). The pipeline will load using `safetensors` if the safetensors weights are available *and* if + `safetensors` is installed. If the to `False` the pipeline will *not* use `safetensors`. + extract_ema (`bool`, *optional*, defaults to `False`): Only relevant for + checkpoints that have both EMA and non-EMA weights. Whether to extract the EMA weights or not. Defaults + to `False`. Pass `True` to extract the EMA weights. EMA weights usually yield higher quality images for + inference. Non-EMA weights are usually better to continue fine-tuning. + upcast_attention (`bool`, *optional*, defaults to `None`): + Whether the attention computation should always be upcasted. This is necessary when running stable + image_size (`int`, *optional*, defaults to 512): + The image size that the model was trained on. Use 512 for Stable Diffusion v1.X and Stable Diffusion v2 + Base. Use 768 for Stable Diffusion v2. + prediction_type (`str`, *optional*): + The prediction type that the model was trained on. Use `'epsilon'` for Stable Diffusion v1.X and Stable + Diffusion v2 Base. Use `'v_prediction'` for Stable Diffusion v2. + num_in_channels (`int`, *optional*, defaults to None): + The number of input channels. If `None`, it will be automatically inferred. + scheduler_type (`str`, *optional*, defaults to 'pndm'): + Type of scheduler to use. Should be one of `["pndm", "lms", "heun", "euler", "euler-ancestral", "dpm", + "ddim"]`. + load_safety_checker (`bool`, *optional*, defaults to `True`): + Whether to load the safety checker or not. Defaults to `True`. + kwargs (remaining dictionary of keyword arguments, *optional*): + Can be used to overwrite load - and saveable variables - *i.e.* the pipeline components - of the + specific pipeline class. The overwritten components are then directly passed to the pipelines + `__init__` method. See example below for more information. + + Examples: + + ```py + >>> from diffusers import StableDiffusionPipeline + + >>> # Download pipeline from huggingface.co and cache. + >>> pipeline = StableDiffusionPipeline.from_ckpt("https://huggingface.co/WarriorMama777/OrangeMixs/blob/main/Models/AbyssOrangeMix/AbyssOrangeMix.safetensors") + + >>> # Download pipeline from local file + >>> # file is downloaded under ./v1-5-pruned-emaonly.ckpt + >>> pipeline = StableDiffusionPipeline.from_ckpt("./v1-5-pruned-emaonly") + + >>> # Enable float16 and move to GPU + >>> pipeline = StableDiffusionPipeline.from_ckpt("https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt, torch_dtype=torch.float16) + >>> pipeline.to("cuda") + ``` + """ + # import here to avoid circular dependency + from .pipelines.stable_diffusion.convert_from_ckpt import download_from_original_stable_diffusion_ckpt + + cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE) + resume_download = kwargs.pop("resume_download", False) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", HF_HUB_OFFLINE) + use_auth_token = kwargs.pop("use_auth_token", None) + revision = kwargs.pop("revision", None) + extract_ema = kwargs.pop("extract_ema", False) + image_size = kwargs.pop("image_size", 512) + scheduler_type = kwargs.pop("scheduler_type", "pndm") + num_in_channels = kwargs.pop("num_in_channels", None) + upcast_attention = kwargs.pop("upcast_attention", None) + load_safety_checker = kwargs.pop("load_safety_checker", True) + prediction_type = kwargs.pop("prediction_type", None) + + torch_dtype = kwargs.pop("torch_dtype", None) + + use_safetensors = kwargs.pop("use_safetensors", None if is_safetensors_available() else False) + + pipeline_name = cls.__name__ + file_extension = pretrained_model_name_or_path.rsplit(".", 1)[-1] + from_safetensors = file_extension == "safetensors" + + if from_safetensors and use_safetensors is True: + raise ValueError("Make sure to install `safetensors` with `pip install safetensors`.") + + # TODO: For now we only support stable diffusion + stable_unclip = None + controlnet = False + + if pipeline_name == "StableDiffusionControlNetPipeline": + model_type = "FrozenCLIPEmbedder" + controlnet = True + elif "StableDiffusion" in pipeline_name: + model_type = "FrozenCLIPEmbedder" + elif pipeline_name == "StableUnCLIPPipeline": + model_type == "FrozenOpenCLIPEmbedder" + stable_unclip = "txt2img" + elif pipeline_name == "StableUnCLIPImg2ImgPipeline": + model_type == "FrozenOpenCLIPEmbedder" + stable_unclip = "img2img" + elif pipeline_name == "PaintByExamplePipeline": + model_type == "PaintByExample" + elif pipeline_name == "LDMTextToImagePipeline": + model_type == "LDMTextToImage" + else: + raise ValueError(f"Unhandled pipeline class: {pipeline_name}") + + # remove huggingface url + for prefix in ["https://huggingface.co/", "huggingface.co/", "hf.co/", "https://hf.co/"]: + if pretrained_model_name_or_path.startswith(prefix): + pretrained_model_name_or_path = pretrained_model_name_or_path[len(prefix) :] + + # Code based on diffusers.pipelines.pipeline_utils.DiffusionPipeline.from_pretrained + ckpt_path = Path(pretrained_model_name_or_path) + if not ckpt_path.is_file(): + # get repo_id and (potentially nested) file path of ckpt in repo + repo_id = str(Path().joinpath(*ckpt_path.parts[:2])) + file_path = str(Path().joinpath(*ckpt_path.parts[2:])) + + if file_path.startswith("blob/"): + file_path = file_path[len("blob/") :] + + if file_path.startswith("main/"): + file_path = file_path[len("main/") :] + + pretrained_model_name_or_path = hf_hub_download( + repo_id, + filename=file_path, + cache_dir=cache_dir, + resume_download=resume_download, + proxies=proxies, + local_files_only=local_files_only, + use_auth_token=use_auth_token, + revision=revision, + force_download=force_download, + ) + + pipe = download_from_original_stable_diffusion_ckpt( + pretrained_model_name_or_path, + pipeline_class=cls, + model_type=model_type, + stable_unclip=stable_unclip, + controlnet=controlnet, + from_safetensors=from_safetensors, + extract_ema=extract_ema, + image_size=image_size, + scheduler_type=scheduler_type, + num_in_channels=num_in_channels, + upcast_attention=upcast_attention, + load_safety_checker=load_safety_checker, + prediction_type=prediction_type, + ) + + if torch_dtype is not None: + pipe.to(torch_dtype=torch_dtype) + + return pipe diff --git a/src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py b/src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py index a42c131842ef..5961636dd197 100644 --- a/src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +++ b/src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py @@ -16,12 +16,10 @@ import re from io import BytesIO -from pathlib import Path from typing import Optional import requests import torch -from huggingface_hub import hf_hub_download from transformers import ( AutoFeatureExtractor, BertTokenizerFast, @@ -49,19 +47,15 @@ LMSDiscreteScheduler, PNDMScheduler, UnCLIPScheduler, - StableDiffusionControlNetPipeline, ) +from ...utils import is_omegaconf_available, is_safetensors_available, logging +from ...utils.import_utils import BACKENDS_MAPPING from ..latent_diffusion.pipeline_latent_diffusion import LDMBertConfig, LDMBertModel from ..paint_by_example import PaintByExampleImageEncoder +from ..pipeline_utils import DiffusionPipeline from .safety_checker import StableDiffusionSafetyChecker - from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer -from ..pipeline_utils import DiffusionPipeline - -from ...utils import is_omegaconf_available, is_safetensors_available, logging, DIFFUSERS_CACHE, HF_HUB_OFFLINE -from ...utils.import_utils import BACKENDS_MAPPING - logger = logging.get_logger(__name__) # pylint: disable=invalid-name @@ -1044,12 +1038,12 @@ def download_from_original_stable_diffusion_ckpt( # import pipelines here to avoid circular import error when using from_ckpt method from diffusers import ( + LDMTextToImagePipeline, + PaintByExamplePipeline, StableDiffusionControlNetPipeline, StableDiffusionPipeline, StableUnCLIPImg2ImgPipeline, StableUnCLIPPipeline, - LDMTextToImagePipeline, - PaintByExamplePipeline, ) if pipeline_class is None: @@ -1384,253 +1378,3 @@ def download_controlnet_from_original_ckpt( ) return controlnet_model - - -class FromCkptMixin: - """This helper class allows to directly load .ckpt stable diffusion file_extension - into the respective classes.""" - - @classmethod - def from_ckpt(cls, pretrained_model_name_or_path, **kwargs): - r""" - Instantiate a PyTorch diffusion pipeline from pre-trained pipeline weights saved in the original .ckpt format. - - The pipeline is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated). - - Parameters: - pretrained_model_name_or_path (`str` or `os.PathLike`, *optional*): - Can be either: - - - A string, the *repo id* of a pretrained pipeline hosted inside a model repo on - https://huggingface.co/ Valid repo ids have to be located under a user or organization name, like - `CompVis/ldm-text2im-large-256`. - - A path to a *directory* containing pipeline weights saved using - [`~DiffusionPipeline.save_pretrained`], e.g., `./my_pipeline_directory/`. - torch_dtype (`str` or `torch.dtype`, *optional*): - Override the default `torch.dtype` and load the model under this dtype. If `"auto"` is passed the dtype - will be automatically derived from the model's weights. - custom_pipeline (`str`, *optional*): - - - - This is an experimental feature and is likely to change in the future. - - - - Can be either: - - - A string, the *repo id* of a custom pipeline hosted inside a model repo on - https://huggingface.co/. Valid repo ids have to be located under a user or organization name, - like `hf-internal-testing/diffusers-dummy-pipeline`. - - - - It is required that the model repo has a file, called `pipeline.py` that defines the custom - pipeline. - - - - - A string, the *file name* of a community pipeline hosted on GitHub under - https://github.com/huggingface/diffusers/tree/main/examples/community. Valid file names have to - match exactly the file name without `.py` located under the above link, *e.g.* - `clip_guided_stable_diffusion`. - - - - Community pipelines are always loaded from the current `main` branch of GitHub. - - - - - A path to a *directory* containing a custom pipeline, e.g., `./my_pipeline_directory/`. - - - - It is required that the directory has a file, called `pipeline.py` that defines the custom - pipeline. - - - - For more information on how to load and create custom pipelines, please have a look at [Loading and - Adding Custom - Pipelines](https://huggingface.co/docs/diffusers/using-diffusers/custom_pipeline_overview) - - force_download (`bool`, *optional*, defaults to `False`): - Whether or not to force the (re-)download of the model weights and configuration files, overriding the - cached versions if they exist. - cache_dir (`Union[str, os.PathLike]`, *optional*): - Path to a directory in which a downloaded pretrained model configuration should be cached if the - standard cache should not be used. - resume_download (`bool`, *optional*, defaults to `False`): - Whether or not to delete incompletely received files. Will attempt to resume the download if such a - file exists. - proxies (`Dict[str, str]`, *optional*): - A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128', - 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. - output_loading_info(`bool`, *optional*, defaults to `False`): - Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages. - local_files_only(`bool`, *optional*, defaults to `False`): - Whether or not to only look at local files (i.e., do not try to download the model). - use_auth_token (`str` or *bool*, *optional*): - The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated - when running `huggingface-cli login` (stored in `~/.huggingface`). - revision (`str`, *optional*, defaults to `"main"`): - The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a - git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any - identifier allowed by git. - custom_revision (`str`, *optional*, defaults to `"main"` when loading from the Hub and to local version of `diffusers` when loading from GitHub): - The specific model version to use. It can be a branch name, a tag name, or a commit id similar to - `revision` when loading a custom pipeline from the Hub. It can be a diffusers version when loading a - custom pipeline from GitHub. - mirror (`str`, *optional*): - Mirror source to accelerate downloads in China. If you are from China and have an accessibility - problem, you can set this option to resolve it. Note that we do not guarantee the timeliness or safety. - Please refer to the mirror site for more information. specify the folder name here. - device_map (`str` or `Dict[str, Union[int, str, torch.device]]`, *optional*): - A map that specifies where each submodule should go. It doesn't need to be refined to each - parameter/buffer name, once a given module name is inside, every submodule of it will be sent to the - same device. - - To have Accelerate compute the most optimized `device_map` automatically, set `device_map="auto"`. For - more information about each option see [designing a device - map](https://hf.co/docs/accelerate/main/en/usage_guides/big_modeling#designing-a-device-map). - low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`): - Speed up model loading by not initializing the weights and only loading the pre-trained weights. This - also tries to not use more than 1x model size in CPU memory (including peak memory) while loading the - model. This is only supported when torch version >= 1.9.0. If you are using an older version of torch, - setting this argument to `True` will raise an error. - use_safetensors (`bool`, *optional* ): - If set to `True`, the pipeline will be loaded from `safetensors` weights. If set to `None` (the - default). The pipeline will load using `safetensors` if the safetensors weights are available *and* if - `safetensors` is installed. If the to `False` the pipeline will *not* use `safetensors`. - kwargs (remaining dictionary of keyword arguments, *optional*): - Can be used to overwrite load - and saveable variables - *i.e.* the pipeline components - of the - specific pipeline class. The overwritten components are then directly passed to the pipelines - `__init__` method. See example below for more information. - variant (`str`, *optional*): - If specified load weights from `variant` filename, *e.g.* pytorch_model..bin. `variant` is - ignored when using `from_flax`. - - - - It is required to be logged in (`huggingface-cli login`) when you want to use private or [gated - models](https://huggingface.co/docs/hub/models-gated#gated-models), *e.g.* `"runwayml/stable-diffusion-v1-5"` - - - - - - Activate the special ["offline-mode"](https://huggingface.co/diffusers/installation.html#offline-mode) to use - this method in a firewalled environment. - - - - Examples: - - ```py - >>> from diffusers import DiffusionPipeline - - >>> # Download pipeline from huggingface.co and cache. - >>> pipeline = DiffusionPipeline.from_pretrained("CompVis/ldm-text2im-large-256") - - >>> # Download pipeline that requires an authorization token - >>> # For more information on access tokens, please refer to this section - >>> # of the documentation](https://huggingface.co/docs/hub/security-tokens) - >>> pipeline = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5") - - >>> # Use a different scheduler - >>> from diffusers import LMSDiscreteScheduler - - >>> scheduler = LMSDiscreteScheduler.from_config(pipeline.scheduler.config) - >>> pipeline.scheduler = scheduler - ``` - """ - cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE) - resume_download = kwargs.pop("resume_download", False) - force_download = kwargs.pop("force_download", False) - proxies = kwargs.pop("proxies", None) - local_files_only = kwargs.pop("local_files_only", HF_HUB_OFFLINE) - use_auth_token = kwargs.pop("use_auth_token", None) - revision = kwargs.pop("revision", None) - provider = kwargs.pop("provider", None) - sess_options = kwargs.pop("sess_options", None) - extract_ema = kwargs.pop("extract_ema", False) - image_size = kwargs.pop("image_size", 512) - scheduler_type = kwargs.pop("scheduler_type", "pndm") - num_in_channels = kwargs.pop("num_in_channels", None) - upcast_attention = kwargs.pop("upcast_attention", None) - load_safety_checker = kwargs.pop("load_safety_checker", True) - - torch_dtype = kwargs.pop("torch_dtype", None) - - use_safetensors = kwargs.pop("use_safetensors", None if is_safetensors_available() else False) - - pipeline_name = cls.__name__ - - file_extension = pretrained_model_name_or_path.rsplit(".", 1)[-1] - from_safetensors = file_extension == "safetensors" - - if from_safetensors and use_safetensors is True: - raise ValueError("Make sure to install `safetensors` with `pip install safetensors`.") - - # TODO: For now we only support stable diffusion - stable_unclip = None - controlnet = False - - if pipeline_name == "StableDiffusionControlNetPipeline": - model_type = "FrozenCLIPEmbedder" - controlnet = True - elif "StableDiffusion" in pipeline_name: - model_type = "FrozenCLIPEmbedder" - elif pipeline_name == "StableUnCLIPPipeline": - model_type == "FrozenOpenCLIPEmbedder" - stable_unclip = "txt2img" - elif pipeline_name == "StableUnCLIPImg2ImgPipeline": - model_type == "FrozenOpenCLIPEmbedder" - stable_unclip = "img2img" - elif pipeline_name == "PaintByExamplePipeline": - model_type == "PaintByExample" - elif pipeline_name == "LDMTextToImagePipeline": - model_type == "LDMTextToImage" - else: - raise ValueError(f"Unhandled pipeline class: {pipeline_name}") - - # Code based on diffusers.pipelines.pipeline_utils.DiffusionPipeline.from_pretrained - ckpt_path = Path(pretrained_model_name_or_path) - if not ckpt_path.is_file(): - # get repo_id and (potentially nested) file path of ckpt in repo - repo_id = str(Path().joinpath(*ckpt_path.parts[:2])) - file_path = str(Path().joinpath(*ckpt_path.parts[2:])) - - pretrained_model_name_or_path = hf_hub_download( - repo_id, - filename=file_path, - cache_dir=cache_dir, - resume_download=resume_download, - proxies=proxies, - local_files_only=local_files_only, - use_auth_token=use_auth_token, - revision=revision, - force_download=force_download, - provider=provider, - sess_options=sess_options, - ) - - pipe = download_from_original_stable_diffusion_ckpt( - pretrained_model_name_or_path, - pipeline_class=cls, - model_type=model_type, - stable_unclip=stable_unclip, - controlnet=controlnet, - from_safetensors=from_safetensors, - extract_ema=extract_ema, - image_size=image_size, - scheduler_type=scheduler_type, - num_in_channels=num_in_channels, - upcast_attention=upcast_attention, - load_safety_checker=load_safety_checker, - ) - - if torch_dtype is not None: - pipe.to(torch_dtype) - - return pipe diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index e7a9a71e5dea..c0efe13c3cad 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -20,7 +20,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer from ...configuration_utils import FrozenDict -from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin +from ...loaders import FromCkptMixin, LoraLoaderMixin, TextualInversionLoaderMixin from ...models import AutoencoderKL, UNet2DConditionModel from ...schedulers import KarrasDiffusionSchedulers from ...utils import ( @@ -33,7 +33,6 @@ ) from ..pipeline_utils import DiffusionPipeline from . import StableDiffusionPipelineOutput -from .convert_from_ckpt import FromCkptMixin from .safety_checker import StableDiffusionSafetyChecker diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 511b1d9fe1ab..b15419b8d177 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -23,7 +23,7 @@ from ...configuration_utils import FrozenDict from ...image_processor import VaeImageProcessor -from ...loaders import TextualInversionLoaderMixin +from ...loaders import FromCkptMixin, TextualInversionLoaderMixin from ...models import AutoencoderKL, UNet2DConditionModel from ...schedulers import KarrasDiffusionSchedulers from ...utils import ( @@ -37,7 +37,6 @@ ) from ..pipeline_utils import DiffusionPipeline from . import StableDiffusionPipelineOutput -from .convert_from_ckpt import FromCkptMixin from .safety_checker import StableDiffusionSafetyChecker diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py index 21ab98bb2427..53aefd6b8bdb 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py @@ -22,7 +22,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer from ...configuration_utils import FrozenDict -from ...loaders import TextualInversionLoaderMixin +from ...loaders import FromCkptMixin, TextualInversionLoaderMixin from ...models import AutoencoderKL, UNet2DConditionModel from ...schedulers import KarrasDiffusionSchedulers from ...utils import ( @@ -35,7 +35,6 @@ ) from ..pipeline_utils import DiffusionPipeline from . import StableDiffusionPipelineOutput -from .convert_from_ckpt import FromCkptMixin from .safety_checker import StableDiffusionSafetyChecker diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion.py b/tests/pipelines/stable_diffusion/test_stable_diffusion.py index 14421a64b9e8..fcfcd84c5d48 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion.py @@ -36,6 +36,7 @@ UNet2DConditionModel, logging, ) +from diffusers.models.attention_processor import AttnProcessor from diffusers.utils import load_numpy, nightly, slow, torch_device from diffusers.utils.testing_utils import CaptureLogger, require_torch_gpu @@ -865,6 +866,62 @@ def test_stable_diffusion_textual_inversion(self): assert max_diff < 5e-2 +@slow +@require_torch_gpu +class StableDiffusionPipelineCkptTests(unittest.TestCase): + def tearDown(self): + super().tearDown() + gc.collect() + torch.cuda.empty_cache() + + def test_download_from_hub(self): + ckpt_paths = [ + "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt", + "https://huggingface.co/WarriorMama777/OrangeMixs/blob/main/Models/AbyssOrangeMix/AbyssOrangeMix_base.ckpt", + ] + + for ckpt_path in ckpt_paths: + pipe = StableDiffusionPipeline.from_ckpt(ckpt_path, torch_dtype=torch.float16) + pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config) + pipe.to("cuda") + + image_out = pipe("test", num_inference_steps=1, output_type="np").images[0] + + assert image_out.shape == (512, 512, 3) + + def test_download_local(self): + filename = hf_hub_download("runwayml/stable-diffusion-v1-5", filename="v1-5-pruned-emaonly.ckpt") + + pipe = StableDiffusionPipeline.from_ckpt(filename, torch_dtype=torch.float16) + pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config) + pipe.to("cuda") + + image_out = pipe("test", num_inference_steps=1, output_type="np").images[0] + + assert image_out.shape == (512, 512, 3) + + def test_download_ckpt_diff_format_is_same(self): + ckpt_path = "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt" + + pipe = StableDiffusionPipeline.from_ckpt(ckpt_path) + pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config) + pipe.unet.set_attn_processor(AttnProcessor()) + pipe.to("cuda") + + generator = torch.Generator(device="cpu").manual_seed(0) + image_ckpt = pipe("a turtle", num_inference_steps=5, generator=generator, output_type="np").images[0] + + pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5") + pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config) + pipe.unet.set_attn_processor(AttnProcessor()) + pipe.to("cuda") + + generator = torch.Generator(device="cpu").manual_seed(0) + image = pipe("a turtle", num_inference_steps=5, generator=generator, output_type="np").images[0] + + assert np.max(np.abs(image - image_ckpt)) < 1e-4 + + @nightly @require_torch_gpu class StableDiffusionPipelineNightlyTests(unittest.TestCase): diff --git a/tests/pipelines/test_pipelines.py b/tests/pipelines/test_pipelines.py index 79d10e5da299..a5d70b01d453 100644 --- a/tests/pipelines/test_pipelines.py +++ b/tests/pipelines/test_pipelines.py @@ -457,27 +457,6 @@ def test_download_broken_variant(self): diffusers.utils.import_utils._safetensors_available = True - def test_download_from_ckpt(self): - with tempfile.TemporaryDirectory() as tmpdirname: - ckpt_paths = [ - "runwayml/stable-diffusion-v1-5/v1-5-pruned-emaonly.ckpt", - "WarriorMama777/OrangeMixs/blob/main/Models/AbyssOrangeMix/AbyssOrangeMix_base.ckpt", - ] - - for ckpt_path in ckpt_paths: - StableDiffusionPipeline.from_ckpt(ckpt_path, cache_dir=tmpdirname) - - ckpt_names = [os.path.basename(ckpt_path) for ckpt_path in ckpt_paths] - - files = [] - for cache in os.listdir(tmpdirname): - snapshots = os.path.join(tmpdirname, cache, "snapshots") - all_root_files = [t[-1] for t in os.walk(snapshots)] - files += [item for sublist in all_root_files for item in sublist] - - # check that downloaded filenames match checkpoint filenames - assert set(ckpt_names) == set(files) - def test_local_save_load_index(self): prompt = "hello" for variant in [None, "fp16"]: @@ -596,6 +575,7 @@ def test_text_inversion_download(self): out = pipe(prompt, num_inference_steps=1, output_type="numpy").images assert out.shape == (1, 128, 128, 3) + class CustomPipelineTests(unittest.TestCase): def test_load_custom_pipeline(self): pipeline = DiffusionPipeline.from_pretrained( From 0f633ceb9822596d9afee1a7b640c19fa29bea05 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Wed, 19 Apr 2023 16:31:26 +0200 Subject: [PATCH 06/11] fix more --- @ | 230 ++++++++++++++++++ .../pipelines/stable_diffusion/text2img.mdx | 2 + src/diffusers/__init__.py | 1 - src/diffusers/loaders.py | 13 +- .../pipeline_stable_diffusion.py | 8 + .../pipeline_stable_diffusion_controlnet.py | 3 + .../pipeline_stable_diffusion_img2img.py | 12 +- .../pipeline_stable_diffusion_inpaint.py | 11 +- ...ipeline_stable_diffusion_inpaint_legacy.py | 14 +- 9 files changed, 284 insertions(+), 10 deletions(-) create mode 100644 @ diff --git a/@ b/@ new file mode 100644 index 000000000000..40029fcecfd1 --- /dev/null +++ b/@ @@ -0,0 +1,230 @@ +__version__ = "0.16.0.dev0" + +from .configuration_utils import ConfigMixin +from .utils import ( + OptionalDependencyNotAvailable, + is_flax_available, + is_inflect_available, + is_k_diffusion_available, + is_k_diffusion_version, + is_librosa_available, + is_note_seq_available, + is_onnx_available, + is_scipy_available, + is_torch_available, + is_transformers_available, + is_transformers_version, + is_unidecode_available, + logging, +) + + +try: + if not is_onnx_available(): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from .utils.dummy_onnx_objects import * # noqa F403 +else: + from .pipelines import OnnxRuntimeModel + +try: + if not is_torch_available(): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from .utils.dummy_pt_objects import * # noqa F403 +else: + from .models import ( + AutoencoderKL, + ControlNetModel, + ModelMixin, + PriorTransformer, + T5FilmDecoder, + Transformer2DModel, + UNet1DModel, + UNet2DConditionModel, + UNet2DModel, + UNet3DConditionModel, + VQModel, + ) + from .optimization import ( + get_constant_schedule, + get_constant_schedule_with_warmup, + get_cosine_schedule_with_warmup, + get_cosine_with_hard_restarts_schedule_with_warmup, + get_linear_schedule_with_warmup, + get_polynomial_decay_schedule_with_warmup, + get_scheduler, + ) + from .pipelines import ( + AudioPipelineOutput, + DanceDiffusionPipeline, + DDIMPipeline, + DDPMPipeline, + DiffusionPipeline, + DiTPipeline, + ImagePipelineOutput, + KarrasVePipeline, + LDMPipeline, + LDMSuperResolutionPipeline, + PNDMPipeline, + RePaintPipeline, + ScoreSdeVePipeline, + ) + from .schedulers import ( + DDIMInverseScheduler, + DDIMScheduler, + DDPMScheduler, + DEISMultistepScheduler, + DPMSolverMultistepScheduler, + DPMSolverSinglestepScheduler, + EulerAncestralDiscreteScheduler, + EulerDiscreteScheduler, + HeunDiscreteScheduler, + IPNDMScheduler, + KarrasVeScheduler, + KDPM2AncestralDiscreteScheduler, + KDPM2DiscreteScheduler, + PNDMScheduler, + RePaintScheduler, + SchedulerMixin, + ScoreSdeVeScheduler, + UnCLIPScheduler, + UniPCMultistepScheduler, + VQDiffusionScheduler, + ) + from .training_utils import EMAModel + +try: + if not (is_torch_available() and is_scipy_available()): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from .utils.dummy_torch_and_scipy_objects import * # noqa F403 +else: + from .schedulers import LMSDiscreteScheduler + + +try: + if not (is_torch_available() and is_transformers_available()): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from .utils.dummy_torch_and_transformers_objects import * # noqa F403 +else: + from .pipelines import ( + AltDiffusionImg2ImgPipeline, + AltDiffusionPipeline, + AudioLDMPipeline, + CycleDiffusionPipeline, + LDMTextToImagePipeline, + PaintByExamplePipeline, + SemanticStableDiffusionPipeline, + StableDiffusionAttendAndExcitePipeline, + StableDiffusionControlNetPipeline, + StableDiffusionDepth2ImgPipeline, + StableDiffusionImageVariationPipeline, + StableDiffusionImg2ImgPipeline, + StableDiffusionInpaintPipeline, + StableDiffusionInpaintPipelineLegacy, + StableDiffusionInstructPix2PixPipeline, + StableDiffusionLatentUpscalePipeline, + StableDiffusionModelEditingPipeline, + StableDiffusionPanoramaPipeline, + StableDiffusionPipeline, + StableDiffusionPipelineSafe, + StableDiffusionPix2PixZeroPipeline, + StableDiffusionSAGPipeline, + StableDiffusionUpscalePipeline, + StableUnCLIPImg2ImgPipeline, + StableUnCLIPPipeline, + TextToVideoSDPipeline, + TextToVideoZeroPipeline, + UnCLIPImageVariationPipeline, + UnCLIPPipeline, + VersatileDiffusionDualGuidedPipeline, + VersatileDiffusionImageVariationPipeline, + VersatileDiffusionPipeline, + VersatileDiffusionTextToImagePipeline, + VQDiffusionPipeline, + ) + +try: + if not (is_torch_available() and is_transformers_available() and is_k_diffusion_available()): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from .utils.dummy_torch_and_transformers_and_k_diffusion_objects import * # noqa F403 +else: + from .pipelines import StableDiffusionKDiffusionPipeline + +try: + if not (is_torch_available() and is_transformers_available() and is_onnx_available()): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from .utils.dummy_torch_and_transformers_and_onnx_objects import * # noqa F403 +else: + from .pipelines import ( + OnnxStableDiffusionImg2ImgPipeline, + OnnxStableDiffusionInpaintPipeline, + OnnxStableDiffusionInpaintPipelineLegacy, + OnnxStableDiffusionPipeline, + OnnxStableDiffusionUpscalePipeline, + StableDiffusionOnnxPipeline, + ) + +try: + if not (is_torch_available() and is_librosa_available()): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from .utils.dummy_torch_and_librosa_objects import * # noqa F403 +else: + from .pipelines import AudioDiffusionPipeline, Mel + +try: + if not (is_transformers_available() and is_torch_available() and is_note_seq_available()): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from .utils.dummy_transformers_and_torch_and_note_seq_objects import * # noqa F403 +else: + from .pipelines import SpectrogramDiffusionPipeline + +try: + if not is_flax_available(): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from .utils.dummy_flax_objects import * # noqa F403 +else: + from .models.controlnet_flax import FlaxControlNetModel + from .models.modeling_flax_utils import FlaxModelMixin + from .models.unet_2d_condition_flax import FlaxUNet2DConditionModel + from .models.vae_flax import FlaxAutoencoderKL + from .pipelines import FlaxDiffusionPipeline + from .schedulers import ( + FlaxDDIMScheduler, + FlaxDDPMScheduler, + FlaxDPMSolverMultistepScheduler, + FlaxKarrasVeScheduler, + FlaxLMSDiscreteScheduler, + FlaxPNDMScheduler, + FlaxSchedulerMixin, + FlaxScoreSdeVeScheduler, + ) + + +try: + if not (is_flax_available() and is_transformers_available()): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from .utils.dummy_flax_and_transformers_objects import * # noqa F403 +else: + from .pipelines import ( + FlaxStableDiffusionControlNetPipeline, + FlaxStableDiffusionImg2ImgPipeline, + FlaxStableDiffusionInpaintPipeline, + FlaxStableDiffusionPipeline, + ) + +try: + if not (is_note_seq_available()): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from .utils.dummy_note_seq_objects import * # noqa F403 +else: + from .pipelines import MidiProcessor diff --git a/docs/source/en/api/pipelines/stable_diffusion/text2img.mdx b/docs/source/en/api/pipelines/stable_diffusion/text2img.mdx index 6b8d53bf6510..0478e516eaf1 100644 --- a/docs/source/en/api/pipelines/stable_diffusion/text2img.mdx +++ b/docs/source/en/api/pipelines/stable_diffusion/text2img.mdx @@ -39,6 +39,8 @@ Available Checkpoints are: - disable_xformers_memory_efficient_attention - enable_vae_tiling - disable_vae_tiling + - from_ckpt + - load_attn_procs [[autodoc]] FlaxStableDiffusionPipeline - all diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index 07c17100e0e0..40029fcecfd1 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -109,7 +109,6 @@ except OptionalDependencyNotAvailable: from .utils.dummy_torch_and_transformers_objects import * # noqa F403 else: - from .loaders import TextualInversionLoaderMixin from .pipelines import ( AltDiffusionImg2ImgPipeline, AltDiffusionPipeline, diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py index 790ebf49815f..b8c8e483aaac 100644 --- a/src/diffusers/loaders.py +++ b/src/diffusers/loaders.py @@ -433,6 +433,7 @@ def load_textual_inversion( Example: To load a textual inversion embedding vector in `diffusers` format: + ```py from diffusers import StableDiffusionPipeline import torch @@ -465,6 +466,7 @@ def load_textual_inversion( image = pipe(prompt, num_inference_steps=50).images[0] image.save("character.png") ``` + """ if not hasattr(self, "tokenizer") or not isinstance(self.tokenizer, PreTrainedTokenizer): raise ValueError( @@ -1130,14 +1132,19 @@ def from_ckpt(cls, pretrained_model_name_or_path, **kwargs): >>> from diffusers import StableDiffusionPipeline >>> # Download pipeline from huggingface.co and cache. - >>> pipeline = StableDiffusionPipeline.from_ckpt("https://huggingface.co/WarriorMama777/OrangeMixs/blob/main/Models/AbyssOrangeMix/AbyssOrangeMix.safetensors") + >>> pipeline = StableDiffusionPipeline.from_ckpt( + ... "https://huggingface.co/WarriorMama777/OrangeMixs/blob/main/Models/AbyssOrangeMix/AbyssOrangeMix.safetensors" + ... ) >>> # Download pipeline from local file >>> # file is downloaded under ./v1-5-pruned-emaonly.ckpt - >>> pipeline = StableDiffusionPipeline.from_ckpt("./v1-5-pruned-emaonly") + >>> pipeline = StableDiffusionPipeline.from_ckpt("./v1-5-pruned-emaonly") >>> # Enable float16 and move to GPU - >>> pipeline = StableDiffusionPipeline.from_ckpt("https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt, torch_dtype=torch.float16) + >>> pipeline = StableDiffusionPipeline.from_ckpt( + ... "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt", + ... torch_dtype=torch.float16, + ... ) >>> pipeline.to("cuda") ``` """ diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index c0efe13c3cad..7347d70c4023 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -60,6 +60,14 @@ class StableDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lo This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) + In addition the pipeline inherits the following loading methods: + - *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`] + - *LoRA*: [`loaders.LoraLoaderMixin.load_lora_weights`] + - *Ckpt*: [`loaders.FromCkptMixin.from_ckpt`] + + as well as the following saving methods: + - *LoRA*: [`loaders.LoraLoaderMixin.save_lora_weights`] + Args: vae ([`AutoencoderKL`]): Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_controlnet.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_controlnet.py index 3b8889d92b55..322f2232fc8a 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_controlnet.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_controlnet.py @@ -156,6 +156,9 @@ class StableDiffusionControlNetPipeline(DiffusionPipeline, TextualInversionLoade This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) + In addition the pipeline inherits the following loading methods: + - *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`] + Args: vae ([`AutoencoderKL`]): Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index b15419b8d177..c26ddf06cadc 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -23,7 +23,7 @@ from ...configuration_utils import FrozenDict from ...image_processor import VaeImageProcessor -from ...loaders import FromCkptMixin, TextualInversionLoaderMixin +from ...loaders import FromCkptMixin, LoraLoaderMixin, TextualInversionLoaderMixin from ...models import AutoencoderKL, UNet2DConditionModel from ...schedulers import KarrasDiffusionSchedulers from ...utils import ( @@ -92,13 +92,21 @@ def preprocess(image): return image -class StableDiffusionImg2ImgPipeline(DiffusionPipeline, TextualInversionLoaderMixin, FromCkptMixin): +class StableDiffusionImg2ImgPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromCkptMixin): r""" Pipeline for text-guided image to image generation using Stable Diffusion. This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) + In addition the pipeline inherits the following loading methods: + - *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`] + - *LoRA*: [`loaders.LoraLoaderMixin.load_lora_weights`] + - *Ckpt*: [`loaders.FromCkptMixin.from_ckpt`] + + as well as the following saving methods: + - *LoRA*: [`loaders.LoraLoaderMixin.save_lora_weights`] + Args: vae ([`AutoencoderKL`]): Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index 8e0ea5a8d079..fb2e5dc424e3 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -22,7 +22,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer from ...configuration_utils import FrozenDict -from ...loaders import TextualInversionLoaderMixin +from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin from ...models import AutoencoderKL, UNet2DConditionModel from ...schedulers import KarrasDiffusionSchedulers from ...utils import deprecate, is_accelerate_available, is_accelerate_version, logging, randn_tensor @@ -138,13 +138,20 @@ def prepare_mask_and_masked_image(image, mask): return mask, masked_image -class StableDiffusionInpaintPipeline(DiffusionPipeline, TextualInversionLoaderMixin): +class StableDiffusionInpaintPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin): r""" Pipeline for text-guided image inpainting using Stable Diffusion. *This is an experimental feature*. This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) + In addition the pipeline inherits the following loading methods: + - *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`] + - *LoRA*: [`loaders.LoraLoaderMixin.load_lora_weights`] + + as well as the following saving methods: + - *LoRA*: [`loaders.LoraLoaderMixin.save_lora_weights`] + Args: vae ([`AutoencoderKL`]): Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py index 53aefd6b8bdb..1c8377c7e54e 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py @@ -22,7 +22,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer from ...configuration_utils import FrozenDict -from ...loaders import FromCkptMixin, TextualInversionLoaderMixin +from ...loaders import FromCkptMixin, LoraLoaderMixin, TextualInversionLoaderMixin from ...models import AutoencoderKL, UNet2DConditionModel from ...schedulers import KarrasDiffusionSchedulers from ...utils import ( @@ -82,13 +82,23 @@ def preprocess_mask(mask, scale_factor=8): return mask -class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline, TextualInversionLoaderMixin, FromCkptMixin): +class StableDiffusionInpaintPipelineLegacy( + DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromCkptMixin +): r""" Pipeline for text-guided image inpainting using Stable Diffusion. *This is an experimental feature*. This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) + In addition the pipeline inherits the following loading methods: + - *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`] + - *LoRA*: [`loaders.LoraLoaderMixin.load_lora_weights`] + - *Ckpt*: [`loaders.FromCkptMixin.from_ckpt`] + + as well as the following saving methods: + - *LoRA*: [`loaders.LoraLoaderMixin.save_lora_weights`] + Args: vae ([`AutoencoderKL`]): Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations. From 3dfdb0b592710037b3fa03073587d8debb9d7c1b Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Wed, 19 Apr 2023 16:31:32 +0200 Subject: [PATCH 07/11] up --- @ | 230 -------------------------------------------------------------- 1 file changed, 230 deletions(-) delete mode 100644 @ diff --git a/@ b/@ deleted file mode 100644 index 40029fcecfd1..000000000000 --- a/@ +++ /dev/null @@ -1,230 +0,0 @@ -__version__ = "0.16.0.dev0" - -from .configuration_utils import ConfigMixin -from .utils import ( - OptionalDependencyNotAvailable, - is_flax_available, - is_inflect_available, - is_k_diffusion_available, - is_k_diffusion_version, - is_librosa_available, - is_note_seq_available, - is_onnx_available, - is_scipy_available, - is_torch_available, - is_transformers_available, - is_transformers_version, - is_unidecode_available, - logging, -) - - -try: - if not is_onnx_available(): - raise OptionalDependencyNotAvailable() -except OptionalDependencyNotAvailable: - from .utils.dummy_onnx_objects import * # noqa F403 -else: - from .pipelines import OnnxRuntimeModel - -try: - if not is_torch_available(): - raise OptionalDependencyNotAvailable() -except OptionalDependencyNotAvailable: - from .utils.dummy_pt_objects import * # noqa F403 -else: - from .models import ( - AutoencoderKL, - ControlNetModel, - ModelMixin, - PriorTransformer, - T5FilmDecoder, - Transformer2DModel, - UNet1DModel, - UNet2DConditionModel, - UNet2DModel, - UNet3DConditionModel, - VQModel, - ) - from .optimization import ( - get_constant_schedule, - get_constant_schedule_with_warmup, - get_cosine_schedule_with_warmup, - get_cosine_with_hard_restarts_schedule_with_warmup, - get_linear_schedule_with_warmup, - get_polynomial_decay_schedule_with_warmup, - get_scheduler, - ) - from .pipelines import ( - AudioPipelineOutput, - DanceDiffusionPipeline, - DDIMPipeline, - DDPMPipeline, - DiffusionPipeline, - DiTPipeline, - ImagePipelineOutput, - KarrasVePipeline, - LDMPipeline, - LDMSuperResolutionPipeline, - PNDMPipeline, - RePaintPipeline, - ScoreSdeVePipeline, - ) - from .schedulers import ( - DDIMInverseScheduler, - DDIMScheduler, - DDPMScheduler, - DEISMultistepScheduler, - DPMSolverMultistepScheduler, - DPMSolverSinglestepScheduler, - EulerAncestralDiscreteScheduler, - EulerDiscreteScheduler, - HeunDiscreteScheduler, - IPNDMScheduler, - KarrasVeScheduler, - KDPM2AncestralDiscreteScheduler, - KDPM2DiscreteScheduler, - PNDMScheduler, - RePaintScheduler, - SchedulerMixin, - ScoreSdeVeScheduler, - UnCLIPScheduler, - UniPCMultistepScheduler, - VQDiffusionScheduler, - ) - from .training_utils import EMAModel - -try: - if not (is_torch_available() and is_scipy_available()): - raise OptionalDependencyNotAvailable() -except OptionalDependencyNotAvailable: - from .utils.dummy_torch_and_scipy_objects import * # noqa F403 -else: - from .schedulers import LMSDiscreteScheduler - - -try: - if not (is_torch_available() and is_transformers_available()): - raise OptionalDependencyNotAvailable() -except OptionalDependencyNotAvailable: - from .utils.dummy_torch_and_transformers_objects import * # noqa F403 -else: - from .pipelines import ( - AltDiffusionImg2ImgPipeline, - AltDiffusionPipeline, - AudioLDMPipeline, - CycleDiffusionPipeline, - LDMTextToImagePipeline, - PaintByExamplePipeline, - SemanticStableDiffusionPipeline, - StableDiffusionAttendAndExcitePipeline, - StableDiffusionControlNetPipeline, - StableDiffusionDepth2ImgPipeline, - StableDiffusionImageVariationPipeline, - StableDiffusionImg2ImgPipeline, - StableDiffusionInpaintPipeline, - StableDiffusionInpaintPipelineLegacy, - StableDiffusionInstructPix2PixPipeline, - StableDiffusionLatentUpscalePipeline, - StableDiffusionModelEditingPipeline, - StableDiffusionPanoramaPipeline, - StableDiffusionPipeline, - StableDiffusionPipelineSafe, - StableDiffusionPix2PixZeroPipeline, - StableDiffusionSAGPipeline, - StableDiffusionUpscalePipeline, - StableUnCLIPImg2ImgPipeline, - StableUnCLIPPipeline, - TextToVideoSDPipeline, - TextToVideoZeroPipeline, - UnCLIPImageVariationPipeline, - UnCLIPPipeline, - VersatileDiffusionDualGuidedPipeline, - VersatileDiffusionImageVariationPipeline, - VersatileDiffusionPipeline, - VersatileDiffusionTextToImagePipeline, - VQDiffusionPipeline, - ) - -try: - if not (is_torch_available() and is_transformers_available() and is_k_diffusion_available()): - raise OptionalDependencyNotAvailable() -except OptionalDependencyNotAvailable: - from .utils.dummy_torch_and_transformers_and_k_diffusion_objects import * # noqa F403 -else: - from .pipelines import StableDiffusionKDiffusionPipeline - -try: - if not (is_torch_available() and is_transformers_available() and is_onnx_available()): - raise OptionalDependencyNotAvailable() -except OptionalDependencyNotAvailable: - from .utils.dummy_torch_and_transformers_and_onnx_objects import * # noqa F403 -else: - from .pipelines import ( - OnnxStableDiffusionImg2ImgPipeline, - OnnxStableDiffusionInpaintPipeline, - OnnxStableDiffusionInpaintPipelineLegacy, - OnnxStableDiffusionPipeline, - OnnxStableDiffusionUpscalePipeline, - StableDiffusionOnnxPipeline, - ) - -try: - if not (is_torch_available() and is_librosa_available()): - raise OptionalDependencyNotAvailable() -except OptionalDependencyNotAvailable: - from .utils.dummy_torch_and_librosa_objects import * # noqa F403 -else: - from .pipelines import AudioDiffusionPipeline, Mel - -try: - if not (is_transformers_available() and is_torch_available() and is_note_seq_available()): - raise OptionalDependencyNotAvailable() -except OptionalDependencyNotAvailable: - from .utils.dummy_transformers_and_torch_and_note_seq_objects import * # noqa F403 -else: - from .pipelines import SpectrogramDiffusionPipeline - -try: - if not is_flax_available(): - raise OptionalDependencyNotAvailable() -except OptionalDependencyNotAvailable: - from .utils.dummy_flax_objects import * # noqa F403 -else: - from .models.controlnet_flax import FlaxControlNetModel - from .models.modeling_flax_utils import FlaxModelMixin - from .models.unet_2d_condition_flax import FlaxUNet2DConditionModel - from .models.vae_flax import FlaxAutoencoderKL - from .pipelines import FlaxDiffusionPipeline - from .schedulers import ( - FlaxDDIMScheduler, - FlaxDDPMScheduler, - FlaxDPMSolverMultistepScheduler, - FlaxKarrasVeScheduler, - FlaxLMSDiscreteScheduler, - FlaxPNDMScheduler, - FlaxSchedulerMixin, - FlaxScoreSdeVeScheduler, - ) - - -try: - if not (is_flax_available() and is_transformers_available()): - raise OptionalDependencyNotAvailable() -except OptionalDependencyNotAvailable: - from .utils.dummy_flax_and_transformers_objects import * # noqa F403 -else: - from .pipelines import ( - FlaxStableDiffusionControlNetPipeline, - FlaxStableDiffusionImg2ImgPipeline, - FlaxStableDiffusionInpaintPipeline, - FlaxStableDiffusionPipeline, - ) - -try: - if not (is_note_seq_available()): - raise OptionalDependencyNotAvailable() -except OptionalDependencyNotAvailable: - from .utils.dummy_note_seq_objects import * # noqa F403 -else: - from .pipelines import MidiProcessor From 8a85f8ac7f7086a9cb6af5bee2a11ec31c9145d4 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Wed, 19 Apr 2023 15:32:16 +0100 Subject: [PATCH 08/11] Apply suggestions from code review --- docs/source/en/api/pipelines/stable_diffusion/text2img.mdx | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/source/en/api/pipelines/stable_diffusion/text2img.mdx b/docs/source/en/api/pipelines/stable_diffusion/text2img.mdx index 0478e516eaf1..6b8d53bf6510 100644 --- a/docs/source/en/api/pipelines/stable_diffusion/text2img.mdx +++ b/docs/source/en/api/pipelines/stable_diffusion/text2img.mdx @@ -39,8 +39,6 @@ Available Checkpoints are: - disable_xformers_memory_efficient_attention - enable_vae_tiling - disable_vae_tiling - - from_ckpt - - load_attn_procs [[autodoc]] FlaxStableDiffusionPipeline - all From 1b11d405b7d616d3a40afe6493a15665a93bc614 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Wed, 19 Apr 2023 16:40:51 +0200 Subject: [PATCH 09/11] finish docs --- .../api/pipelines/stable_diffusion/controlnet.mdx | 3 +++ .../api/pipelines/stable_diffusion/depth2img.mdx | 5 ++++- .../en/api/pipelines/stable_diffusion/img2img.mdx | 6 +++++- .../en/api/pipelines/stable_diffusion/inpaint.mdx | 5 ++++- .../en/api/pipelines/stable_diffusion/pix2pix.mdx | 3 +++ .../api/pipelines/stable_diffusion/text2img.mdx | 6 ++++-- .../alt_diffusion/pipeline_alt_diffusion.py | 8 ++++++++ .../pipeline_alt_diffusion_img2img.py | 8 ++++++++ .../utils/dummy_torch_and_transformers_objects.py | 15 --------------- 9 files changed, 39 insertions(+), 20 deletions(-) diff --git a/docs/source/en/api/pipelines/stable_diffusion/controlnet.mdx b/docs/source/en/api/pipelines/stable_diffusion/controlnet.mdx index af859177c002..79fedb36aa14 100644 --- a/docs/source/en/api/pipelines/stable_diffusion/controlnet.mdx +++ b/docs/source/en/api/pipelines/stable_diffusion/controlnet.mdx @@ -308,6 +308,9 @@ All checkpoints can be found under the authors' namespace [lllyasviel](https://h - disable_vae_slicing - enable_xformers_memory_efficient_attention - disable_xformers_memory_efficient_attention + - load_textual_inversion + - load_lora_weights + - save_lora_weights ## FlaxStableDiffusionControlNetPipeline [[autodoc]] FlaxStableDiffusionControlNetPipeline diff --git a/docs/source/en/api/pipelines/stable_diffusion/depth2img.mdx b/docs/source/en/api/pipelines/stable_diffusion/depth2img.mdx index c46576ff2887..a91167bac58c 100644 --- a/docs/source/en/api/pipelines/stable_diffusion/depth2img.mdx +++ b/docs/source/en/api/pipelines/stable_diffusion/depth2img.mdx @@ -30,4 +30,7 @@ Available Checkpoints are: - enable_attention_slicing - disable_attention_slicing - enable_xformers_memory_efficient_attention - - disable_xformers_memory_efficient_attention \ No newline at end of file + - disable_xformers_memory_efficient_attention + - load_textual_inversion + - load_lora_weights + - save_lora_weights diff --git a/docs/source/en/api/pipelines/stable_diffusion/img2img.mdx b/docs/source/en/api/pipelines/stable_diffusion/img2img.mdx index 09bfb853f9c9..7959c588608b 100644 --- a/docs/source/en/api/pipelines/stable_diffusion/img2img.mdx +++ b/docs/source/en/api/pipelines/stable_diffusion/img2img.mdx @@ -30,7 +30,11 @@ proposed by Chenlin Meng, Yutong He, Yang Song, Jiaming Song, Jiajun Wu, Jun-Yan - disable_attention_slicing - enable_xformers_memory_efficient_attention - disable_xformers_memory_efficient_attention + - load_textual_inversion + - from_ckpt + - load_lora_weights + - save_lora_weights [[autodoc]] FlaxStableDiffusionImg2ImgPipeline - all - - __call__ \ No newline at end of file + - __call__ diff --git a/docs/source/en/api/pipelines/stable_diffusion/inpaint.mdx b/docs/source/en/api/pipelines/stable_diffusion/inpaint.mdx index 33e84a63261f..39e5ae0fd37d 100644 --- a/docs/source/en/api/pipelines/stable_diffusion/inpaint.mdx +++ b/docs/source/en/api/pipelines/stable_diffusion/inpaint.mdx @@ -31,7 +31,10 @@ Available checkpoints are: - disable_attention_slicing - enable_xformers_memory_efficient_attention - disable_xformers_memory_efficient_attention + - load_textual_inversion + - load_lora_weights + - save_lora_weights [[autodoc]] FlaxStableDiffusionInpaintPipeline - all - - __call__ \ No newline at end of file + - __call__ diff --git a/docs/source/en/api/pipelines/stable_diffusion/pix2pix.mdx b/docs/source/en/api/pipelines/stable_diffusion/pix2pix.mdx index 42cd4b896b2e..d01f1df23385 100644 --- a/docs/source/en/api/pipelines/stable_diffusion/pix2pix.mdx +++ b/docs/source/en/api/pipelines/stable_diffusion/pix2pix.mdx @@ -68,3 +68,6 @@ images[0].save("snowy_mountains.png") [[autodoc]] StableDiffusionInstructPix2PixPipeline - __call__ - all + - load_textual_inversion + - load_lora_weights + - save_lora_weights diff --git a/docs/source/en/api/pipelines/stable_diffusion/text2img.mdx b/docs/source/en/api/pipelines/stable_diffusion/text2img.mdx index 0478e516eaf1..ce78434fdbaa 100644 --- a/docs/source/en/api/pipelines/stable_diffusion/text2img.mdx +++ b/docs/source/en/api/pipelines/stable_diffusion/text2img.mdx @@ -39,8 +39,10 @@ Available Checkpoints are: - disable_xformers_memory_efficient_attention - enable_vae_tiling - disable_vae_tiling - - from_ckpt - - load_attn_procs + - load_textual_inversion + - from_ckpt + - load_lora_weights + - save_lora_weights [[autodoc]] FlaxStableDiffusionPipeline - all diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py index bf314b91116e..ff9474ffd43a 100644 --- a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +++ b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py @@ -57,6 +57,14 @@ class AltDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin): This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) + In addition the pipeline inherits the following loading methods: + - *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`] + - *LoRA*: [`loaders.LoraLoaderMixin.load_lora_weights`] + - *Ckpt*: [`loaders.FromCkptMixin.from_ckpt`] + + as well as the following saving methods: + - *LoRA*: [`loaders.LoraLoaderMixin.save_lora_weights`] + Args: vae ([`AutoencoderKL`]): Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations. diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py index 86fc47f424e9..dee4a91924f7 100644 --- a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +++ b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py @@ -96,6 +96,14 @@ class AltDiffusionImg2ImgPipeline(DiffusionPipeline, TextualInversionLoaderMixin This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) + In addition the pipeline inherits the following loading methods: + - *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`] + - *LoRA*: [`loaders.LoraLoaderMixin.load_lora_weights`] + - *Ckpt*: [`loaders.FromCkptMixin.from_ckpt`] + + as well as the following saving methods: + - *LoRA*: [`loaders.LoraLoaderMixin.save_lora_weights`] + Args: vae ([`AutoencoderKL`]): Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations. diff --git a/src/diffusers/utils/dummy_torch_and_transformers_objects.py b/src/diffusers/utils/dummy_torch_and_transformers_objects.py index 8a521457f2e3..bda56d2ae8ae 100644 --- a/src/diffusers/utils/dummy_torch_and_transformers_objects.py +++ b/src/diffusers/utils/dummy_torch_and_transformers_objects.py @@ -2,21 +2,6 @@ from ..utils import DummyObject, requires_backends -class TextualInversionLoaderMixin(metaclass=DummyObject): - _backends = ["torch", "transformers"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "transformers"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "transformers"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "transformers"]) - - class AltDiffusionImg2ImgPipeline(metaclass=DummyObject): _backends = ["torch", "transformers"] From 3ab3e56b76ce5fdf488a73648eda166a0e8b7984 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Wed, 19 Apr 2023 16:45:19 +0200 Subject: [PATCH 10/11] rename --- src/diffusers/loaders.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py index b8c8e483aaac..3133da117390 100644 --- a/src/diffusers/loaders.py +++ b/src/diffusers/loaders.py @@ -1062,14 +1062,14 @@ class FromCkptMixin: into the respective classes.""" @classmethod - def from_ckpt(cls, pretrained_model_name_or_path, **kwargs): + def from_ckpt(cls, pretrained_model_link_or_path, **kwargs): r""" Instantiate a PyTorch diffusion pipeline from pre-trained pipeline weights saved in the original .ckpt format. The pipeline is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated). Parameters: - pretrained_model_name_or_path (`str` or `os.PathLike`, *optional*): + pretrained_model_link_or_path (`str` or `os.PathLike`, *optional*): Can be either: - A link to the .ckpt file on the Hub. Should be in the format `"https://huggingface.co//blob/main/"` @@ -1171,7 +1171,7 @@ def from_ckpt(cls, pretrained_model_name_or_path, **kwargs): use_safetensors = kwargs.pop("use_safetensors", None if is_safetensors_available() else False) pipeline_name = cls.__name__ - file_extension = pretrained_model_name_or_path.rsplit(".", 1)[-1] + file_extension = pretrained_model_link_or_path.rsplit(".", 1)[-1] from_safetensors = file_extension == "safetensors" if from_safetensors and use_safetensors is True: @@ -1201,11 +1201,11 @@ def from_ckpt(cls, pretrained_model_name_or_path, **kwargs): # remove huggingface url for prefix in ["https://huggingface.co/", "huggingface.co/", "hf.co/", "https://hf.co/"]: - if pretrained_model_name_or_path.startswith(prefix): - pretrained_model_name_or_path = pretrained_model_name_or_path[len(prefix) :] + if pretrained_model_link_or_path.startswith(prefix): + pretrained_model_link_or_path = pretrained_model_link_or_path[len(prefix) :] # Code based on diffusers.pipelines.pipeline_utils.DiffusionPipeline.from_pretrained - ckpt_path = Path(pretrained_model_name_or_path) + ckpt_path = Path(pretrained_model_link_or_path) if not ckpt_path.is_file(): # get repo_id and (potentially nested) file path of ckpt in repo repo_id = str(Path().joinpath(*ckpt_path.parts[:2])) @@ -1217,7 +1217,7 @@ def from_ckpt(cls, pretrained_model_name_or_path, **kwargs): if file_path.startswith("main/"): file_path = file_path[len("main/") :] - pretrained_model_name_or_path = hf_hub_download( + pretrained_model_link_or_path = hf_hub_download( repo_id, filename=file_path, cache_dir=cache_dir, @@ -1230,7 +1230,7 @@ def from_ckpt(cls, pretrained_model_name_or_path, **kwargs): ) pipe = download_from_original_stable_diffusion_ckpt( - pretrained_model_name_or_path, + pretrained_model_link_or_path, pipeline_class=cls, model_type=model_type, stable_unclip=stable_unclip, From 3fbf3c0b5537165a644d2317208ac851b29fe74d Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Wed, 19 Apr 2023 17:48:57 +0200 Subject: [PATCH 11/11] make style --- .../en/api/pipelines/stable_diffusion/controlnet.mdx | 2 -- .../pipeline_stable_diffusion_depth2img.py | 11 +++++++++-- .../pipeline_stable_diffusion_instruct_pix2pix.py | 11 +++++++++-- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/docs/source/en/api/pipelines/stable_diffusion/controlnet.mdx b/docs/source/en/api/pipelines/stable_diffusion/controlnet.mdx index 79fedb36aa14..dabd3ded31ce 100644 --- a/docs/source/en/api/pipelines/stable_diffusion/controlnet.mdx +++ b/docs/source/en/api/pipelines/stable_diffusion/controlnet.mdx @@ -309,8 +309,6 @@ All checkpoints can be found under the authors' namespace [lllyasviel](https://h - enable_xformers_memory_efficient_attention - disable_xformers_memory_efficient_attention - load_textual_inversion - - load_lora_weights - - save_lora_weights ## FlaxStableDiffusionControlNetPipeline [[autodoc]] FlaxStableDiffusionControlNetPipeline diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py index 4fe117ba120b..c4f9ae59a4e9 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py @@ -23,7 +23,7 @@ from transformers import CLIPTextModel, CLIPTokenizer, DPTFeatureExtractor, DPTForDepthEstimation from ...configuration_utils import FrozenDict -from ...loaders import TextualInversionLoaderMixin +from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin from ...models import AutoencoderKL, UNet2DConditionModel from ...schedulers import KarrasDiffusionSchedulers from ...utils import PIL_INTERPOLATION, deprecate, is_accelerate_available, logging, randn_tensor @@ -55,13 +55,20 @@ def preprocess(image): return image -class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoaderMixin): +class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin): r""" Pipeline for text-guided image to image generation using Stable Diffusion. This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) + In addition the pipeline inherits the following loading methods: + - *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`] + - *LoRA*: [`loaders.LoraLoaderMixin.load_lora_weights`] + + as well as the following saving methods: + - *LoRA*: [`loaders.LoraLoaderMixin.save_lora_weights`] + Args: vae ([`AutoencoderKL`]): Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py index f7999a08dc9b..49944cdcd636 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py @@ -20,7 +20,7 @@ import torch from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer -from ...loaders import TextualInversionLoaderMixin +from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin from ...models import AutoencoderKL, UNet2DConditionModel from ...schedulers import KarrasDiffusionSchedulers from ...utils import ( @@ -61,13 +61,20 @@ def preprocess(image): return image -class StableDiffusionInstructPix2PixPipeline(DiffusionPipeline, TextualInversionLoaderMixin): +class StableDiffusionInstructPix2PixPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin): r""" Pipeline for pixel-level image editing by following text instructions. Based on Stable Diffusion. This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) + In addition the pipeline inherits the following loading methods: + - *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`] + - *LoRA*: [`loaders.LoraLoaderMixin.load_lora_weights`] + + as well as the following saving methods: + - *LoRA*: [`loaders.LoraLoaderMixin.save_lora_weights`] + Args: vae ([`AutoencoderKL`]): Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.