From ab9ef6ab83e6154d78b73f18bd939dcc7c04833b Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Thu, 9 Feb 2023 15:58:31 +0200
Subject: [PATCH 01/27] [Variant] Add variant loading mechanism

---
 +                                         | 1015 +++++++++++++++++++++
 src/diffusers/models/modeling_utils.py    |   29 +-
 src/diffusers/pipelines/pipeline_utils.py |  129 ++-
 3 files changed, 1125 insertions(+), 48 deletions(-)
 create mode 100644 +

diff --git a/+ b/+
new file mode 100644
index 000000000000..a8b18a60f250
--- /dev/null
+++ b/+
@@ -0,0 +1,1015 @@
+# coding=utf-8
+# Copyright 2022 The HuggingFace Inc. team.
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import importlib
+import inspect
+import os
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import numpy as np
+import PIL
+import torch
+from huggingface_hub import model_info, snapshot_download
+from packaging import version
+from PIL import Image
+from tqdm.auto import tqdm
+
+import diffusers
+
+from ..configuration_utils import ConfigMixin
+from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, _add_variant
+from ..schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME
+from ..utils import (
+    CONFIG_NAME,
+    DIFFUSERS_CACHE,
+    FLAX_WEIGHTS_NAME,
+    HF_HUB_OFFLINE,
+    ONNX_WEIGHTS_NAME,
+    WEIGHTS_NAME,
+    SAFETENSORS_WEIGHTS_NAME,
+    BaseOutput,
+    deprecate,
+    get_class_from_dynamic_module,
+    http_user_agent,
+    is_accelerate_available,
+    is_safetensors_available,
+    is_torch_version,
+    is_transformers_available,
+    logging,
+)
+
+
+if is_transformers_available():
+    import transformers
+    from transformers import PreTrainedModel
+
+
+INDEX_FILE = "diffusion_pytorch_model.bin"
+CUSTOM_PIPELINE_FILE_NAME = "pipeline.py"
+DUMMY_MODULES_FOLDER = "diffusers.utils"
+TRANSFORMERS_DUMMY_MODULES_FOLDER = "transformers.utils"
+
+
+logger = logging.get_logger(__name__)
+
+
+LOADABLE_CLASSES = {
+    "diffusers": {
+        "ModelMixin": ["save_pretrained", "from_pretrained"],
+        "SchedulerMixin": ["save_pretrained", "from_pretrained"],
+        "DiffusionPipeline": ["save_pretrained", "from_pretrained"],
+        "OnnxRuntimeModel": ["save_pretrained", "from_pretrained"],
+    },
+    "transformers": {
+        "PreTrainedTokenizer": ["save_pretrained", "from_pretrained"],
+        "PreTrainedTokenizerFast": ["save_pretrained", "from_pretrained"],
+        "PreTrainedModel": ["save_pretrained", "from_pretrained"],
+        "FeatureExtractionMixin": ["save_pretrained", "from_pretrained"],
+        "ProcessorMixin": ["save_pretrained", "from_pretrained"],
+        "ImageProcessingMixin": ["save_pretrained", "from_pretrained"],
+    },
+    "onnxruntime.training": {
+        "ORTModule": ["save_pretrained", "from_pretrained"],
+    },
+}
+
+ALL_IMPORTABLE_CLASSES = {}
+for library in LOADABLE_CLASSES:
+    ALL_IMPORTABLE_CLASSES.update(LOADABLE_CLASSES[library])
+
+
+@dataclass
+class ImagePipelineOutput(BaseOutput):
+    """
+    Output class for image pipelines.
+
+    Args:
+        images (`List[PIL.Image.Image]` or `np.ndarray`)
+            List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width,
+            num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline.
+    """
+
+    images: Union[List[PIL.Image.Image], np.ndarray]
+
+
+@dataclass
+class AudioPipelineOutput(BaseOutput):
+    """
+    Output class for audio pipelines.
+
+    Args:
+        audios (`np.ndarray`)
+            List of denoised samples of shape `(batch_size, num_channels, sample_rate)`. Numpy array present the
+            denoised audio samples of the diffusion pipeline.
+    """
+
+    audios: np.ndarray
+
+
+def is_safetensors_compatible(filenames, variant=None) -> bool:
+    pt_filenames = set(filename for filename in filenames if filename.endswith(".bin"))
+    is_safetensors_compatible = any(file.endswith(".safetensors") for file in filenames)
+    variant = f".{variant}" if variant is not None else ""
+
+    for pt_filename in pt_filenames:
+        prefix, raw = os.path.split(pt_filename)
+        if raw == f"pytorch_model{variant}.bin":
+            # transformers specific
+            sf_filename = os.path.join(prefix, f"model{variant}.safetensors")
+        else:
+            sf_filename = pt_filename[: -len(".bin")] + ".safetensors"
+        if is_safetensors_compatible and sf_filename not in filenames:
+            logger.warning(f"{sf_filename} not found")
+            is_safetensors_compatible = False
+    return is_safetensors_compatible
+
+
+def variant_compatible_siblings(info, variant=None) -> Union[List[os.PathLike], str]:
+    variant = variant or ""
+
+    filenames = set(sibling.rfilename for sibling in info.siblings)
+    save_formats = ["bin", "safetensors", "flax"]
+
+    variant_filenames = set(f for f in filenames if any(f.endswith(f"{variant}.{s}") for s in save_formats))
+    non_variant_filenames = set(f for f in filenames if (len(f.split(".")) == 2 and any(f.endswith(f".{s}") for s in save_formats)))
+
+    usable_filenames = set(variant_filenames)
+    for f in non_variant_filenames:
+        variant_filename = f"{f.split('.')[0]}.{variant}.{f.split('.')[1]}"
+        if variant_filename not in usable_filenames:
+            usable_filenames.add(f)
+
+    return usable_filenames
+
+
+class DiffusionPipeline(ConfigMixin):
+    r"""
+    Base class for all models.
+
+    [`DiffusionPipeline`] takes care of storing all components (models, schedulers, processors) for diffusion pipelines
+    and handles methods for loading, downloading and saving models as well as a few methods common to all pipelines to:
+
+        - move all PyTorch modules to the device of your choice
+        - enabling/disabling the progress bar for the denoising iteration
+
+    Class attributes:
+
+        - **config_name** (`str`) -- name of the config file that will store the class and module names of all
+          components of the diffusion pipeline.
+        - **_optional_components** (List[`str`]) -- list of all components that are optional so they don't have to be
+          passed for the pipeline to function (should be overridden by subclasses).
+    """
+    config_name = "model_index.json"
+    _optional_components = []
+
+    def register_modules(self, **kwargs):
+        # import it here to avoid circular import
+        from diffusers import pipelines
+
+        for name, module in kwargs.items():
+            # retrieve library
+            if module is None:
+                register_dict = {name: (None, None)}
+            else:
+                library = module.__module__.split(".")[0]
+
+                # check if the module is a pipeline module
+                pipeline_dir = module.__module__.split(".")[-2] if len(module.__module__.split(".")) > 2 else None
+                path = module.__module__.split(".")
+                is_pipeline_module = pipeline_dir in path and hasattr(pipelines, pipeline_dir)
+
+                # if library is not in LOADABLE_CLASSES, then it is a custom module.
+                # Or if it's a pipeline module, then the module is inside the pipeline
+                # folder so we set the library to module name.
+                if library not in LOADABLE_CLASSES or is_pipeline_module:
+                    library = pipeline_dir
+
+                # retrieve class_name
+                class_name = module.__class__.__name__
+
+                register_dict = {name: (library, class_name)}
+
+            # save model index config
+            self.register_to_config(**register_dict)
+
+            # set models
+            setattr(self, name, module)
+
+    def save_pretrained(
+        self,
+        save_directory: Union[str, os.PathLike],
+        safe_serialization: bool = False,
+        variant: Optional[str] = None,
+    ):
+        """
+        Save all variables of the pipeline that can be saved and loaded as well as the pipelines configuration file to
+        a directory. A pipeline variable can be saved and loaded if its class implements both a save and loading
+        method. The pipeline can easily be re-loaded using the `[`~DiffusionPipeline.from_pretrained`]` class method.
+
+        Arguments:
+            save_directory (`str` or `os.PathLike`):
+                Directory to which to save. Will be created if it doesn't exist.
+            safe_serialization (`bool`, *optional*, defaults to `False`):
+                Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).
+            variant (`str`, *optional*):
+                If specified, weights are saved in the format pytorch_model.<variant>.bin.
+        """
+        self.save_config(save_directory)
+
+        model_index_dict = dict(self.config)
+        model_index_dict.pop("_class_name")
+        model_index_dict.pop("_diffusers_version")
+        model_index_dict.pop("_module", None)
+
+        expected_modules, optional_kwargs = self._get_signature_keys(self)
+
+        def is_saveable_module(name, value):
+            if name not in expected_modules:
+                return False
+            if name in self._optional_components and value[0] is None:
+                return False
+            return True
+
+        model_index_dict = {k: v for k, v in model_index_dict.items() if is_saveable_module(k, v)}
+
+        for pipeline_component_name in model_index_dict.keys():
+            sub_model = getattr(self, pipeline_component_name)
+            model_cls = sub_model.__class__
+
+            save_method_name = None
+            # search for the model's base class in LOADABLE_CLASSES
+            for library_name, library_classes in LOADABLE_CLASSES.items():
+                library = importlib.import_module(library_name)
+                for base_class, save_load_methods in library_classes.items():
+                    class_candidate = getattr(library, base_class, None)
+                    if class_candidate is not None and issubclass(model_cls, class_candidate):
+                        # if we found a suitable base class in LOADABLE_CLASSES then grab its save method
+                        save_method_name = save_load_methods[0]
+                        break
+                if save_method_name is not None:
+                    break
+
+            save_method = getattr(sub_model, save_method_name)
+
+            # Call the save method with the argument safe_serialization only if it's supported
+            save_method_signature = inspect.signature(save_method)
+            save_method_accept_safe = "safe_serialization" in save_method_signature.parameters
+            save_method_accept_variant = "variant" in save_method_signature.parameters
+
+            save_kwargs = {}
+            if save_method_accept_safe:
+                save_kwargs["safe_serialization"] = safe_serialization
+            if save_method_accept_variant:
+                save_kwargs["variant"] = variant
+
+            save_method(os.path.join(save_directory, pipeline_component_name), **save_kwargs)
+
+    def to(self, torch_device: Optional[Union[str, torch.device]] = None):
+        if torch_device is None:
+            return self
+
+        module_names, _, _ = self.extract_init_dict(dict(self.config))
+        for name in module_names.keys():
+            module = getattr(self, name)
+            if isinstance(module, torch.nn.Module):
+                if module.dtype == torch.float16 and str(torch_device) in ["cpu"]:
+                    logger.warning(
+                        "Pipelines loaded with `torch_dtype=torch.float16` cannot run with `cpu` device. It"
+                        " is not recommended to move them to `cpu` as running them will fail. Please make"
+                        " sure to use an accelerator to run the pipeline in inference, due to the lack of"
+                        " support for`float16` operations on this device in PyTorch. Please, remove the"
+                        " `torch_dtype=torch.float16` argument, or use another device for inference."
+                    )
+                module.to(torch_device)
+        return self
+
+    @property
+    def device(self) -> torch.device:
+        r"""
+        Returns:
+            `torch.device`: The torch device on which the pipeline is located.
+        """
+        module_names, _, _ = self.extract_init_dict(dict(self.config))
+        for name in module_names.keys():
+            module = getattr(self, name)
+            if isinstance(module, torch.nn.Module):
+                return module.device
+        return torch.device("cpu")
+
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], **kwargs):
+        r"""
+        Instantiate a PyTorch diffusion pipeline from pre-trained pipeline weights.
+
+        The pipeline is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated).
+
+        The warning *Weights from XXX not initialized from pretrained model* means that the weights of XXX do not come
+        pretrained with the rest of the model. It is up to you to train those weights with a downstream fine-tuning
+        task.
+
+        The warning *Weights from XXX not used in YYY* means that the layer XXX is not used by YYY, therefore those
+        weights are discarded.
+
+        Parameters:
+            pretrained_model_name_or_path (`str` or `os.PathLike`, *optional*):
+                Can be either:
+
+                    - A string, the *repo id* of a pretrained pipeline hosted inside a model repo on
+                      https://huggingface.co/ Valid repo ids have to be located under a user or organization name, like
+                      `CompVis/ldm-text2im-large-256`.
+                    - A path to a *directory* containing pipeline weights saved using
+                      [`~DiffusionPipeline.save_pretrained`], e.g., `./my_pipeline_directory/`.
+            torch_dtype (`str` or `torch.dtype`, *optional*):
+                Override the default `torch.dtype` and load the model under this dtype. If `"auto"` is passed the dtype
+                will be automatically derived from the model's weights.
+            custom_pipeline (`str`, *optional*):
+
+                <Tip warning={true}>
+
+                    This is an experimental feature and is likely to change in the future.
+
+                </Tip>
+
+                Can be either:
+
+                    - A string, the *repo id* of a custom pipeline hosted inside a model repo on
+                      https://huggingface.co/. Valid repo ids have to be located under a user or organization name,
+                      like `hf-internal-testing/diffusers-dummy-pipeline`.
+
+                        <Tip>
+
+                         It is required that the model repo has a file, called `pipeline.py` that defines the custom
+                         pipeline.
+
+                        </Tip>
+
+                    - A string, the *file name* of a community pipeline hosted on GitHub under
+                      https://github.com/huggingface/diffusers/tree/main/examples/community. Valid file names have to
+                      match exactly the file name without `.py` located under the above link, *e.g.*
+                      `clip_guided_stable_diffusion`.
+
+                        <Tip>
+
+                         Community pipelines are always loaded from the current `main` branch of GitHub.
+
+                        </Tip>
+
+                    - A path to a *directory* containing a custom pipeline, e.g., `./my_pipeline_directory/`.
+
+                        <Tip>
+
+                         It is required that the directory has a file, called `pipeline.py` that defines the custom
+                         pipeline.
+
+                        </Tip>
+
+                For more information on how to load and create custom pipelines, please have a look at [Loading and
+                Adding Custom
+                Pipelines](https://huggingface.co/docs/diffusers/using-diffusers/custom_pipeline_overview)
+
+            force_download (`bool`, *optional*, defaults to `False`):
+                Whether or not to force the (re-)download of the model weights and configuration files, overriding the
+                cached versions if they exist.
+            resume_download (`bool`, *optional*, defaults to `False`):
+                Whether or not to delete incompletely received files. Will attempt to resume the download if such a
+                file exists.
+            proxies (`Dict[str, str]`, *optional*):
+                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
+                'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
+            output_loading_info(`bool`, *optional*, defaults to `False`):
+                Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages.
+            local_files_only(`bool`, *optional*, defaults to `False`):
+                Whether or not to only look at local files (i.e., do not try to download the model).
+            use_auth_token (`str` or *bool*, *optional*):
+                The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
+                when running `huggingface-cli login` (stored in `~/.huggingface`).
+            revision (`str`, *optional*, defaults to `"main"`):
+                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
+                git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
+                identifier allowed by git.
+            custom_revision (`str`, *optional*, defaults to `"main"` when loading from the Hub and to local version of `diffusers` when loading from GitHub):
+                The specific model version to use. It can be a branch name, a tag name, or a commit id similar to
+                `revision` when loading a custom pipeline from the Hub. It can be a diffusers version when loading a
+                custom pipeline from GitHub.
+            mirror (`str`, *optional*):
+                Mirror source to accelerate downloads in China. If you are from China and have an accessibility
+                problem, you can set this option to resolve it. Note that we do not guarantee the timeliness or safety.
+                Please refer to the mirror site for more information. specify the folder name here.
+            device_map (`str` or `Dict[str, Union[int, str, torch.device]]`, *optional*):
+                A map that specifies where each submodule should go. It doesn't need to be refined to each
+                parameter/buffer name, once a given module name is inside, every submodule of it will be sent to the
+                same device.
+
+                To have Accelerate compute the most optimized `device_map` automatically, set `device_map="auto"`. For
+                more information about each option see [designing a device
+                map](https://hf.co/docs/accelerate/main/en/usage_guides/big_modeling#designing-a-device-map).
+            low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`):
+                Speed up model loading by not initializing the weights and only loading the pre-trained weights. This
+                also tries to not use more than 1x model size in CPU memory (including peak memory) while loading the
+                model. This is only supported when torch version >= 1.9.0. If you are using an older version of torch,
+                setting this argument to `True` will raise an error.
+            return_cached_folder (`bool`, *optional*, defaults to `False`):
+                If set to `True`, path to downloaded cached folder will be returned in addition to loaded pipeline.
+            kwargs (remaining dictionary of keyword arguments, *optional*):
+                Can be used to overwrite load - and saveable variables - *i.e.* the pipeline components - of the
+                specific pipeline class. The overwritten components are then directly passed to the pipelines
+                `__init__` method. See example below for more information.
+            variant (`str`, *optional*):
+                If specified load weights from `variant` filename, *e.g.* pytorch_model.<variant>.bin. `variant` is
+                ignored when using `from_flax`.
+
+        <Tip>
+
+         It is required to be logged in (`huggingface-cli login`) when you want to use private or [gated
+         models](https://huggingface.co/docs/hub/models-gated#gated-models), *e.g.* `"runwayml/stable-diffusion-v1-5"`
+
+        </Tip>
+
+        <Tip>
+
+        Activate the special ["offline-mode"](https://huggingface.co/diffusers/installation.html#offline-mode) to use
+        this method in a firewalled environment.
+
+        </Tip>
+
+        Examples:
+
+        ```py
+        >>> from diffusers import DiffusionPipeline
+
+        >>> # Download pipeline from huggingface.co and cache.
+        >>> pipeline = DiffusionPipeline.from_pretrained("CompVis/ldm-text2im-large-256")
+
+        >>> # Download pipeline that requires an authorization token
+        >>> # For more information on access tokens, please refer to this section
+        >>> # of the documentation](https://huggingface.co/docs/hub/security-tokens)
+        >>> pipeline = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
+
+        >>> # Use a different scheduler
+        >>> from diffusers import LMSDiscreteScheduler
+
+        >>> scheduler = LMSDiscreteScheduler.from_config(pipeline.scheduler.config)
+        >>> pipeline.scheduler = scheduler
+        ```
+        """
+        cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
+        resume_download = kwargs.pop("resume_download", False)
+        force_download = kwargs.pop("force_download", False)
+        proxies = kwargs.pop("proxies", None)
+        local_files_only = kwargs.pop("local_files_only", HF_HUB_OFFLINE)
+        use_auth_token = kwargs.pop("use_auth_token", None)
+        revision = kwargs.pop("revision", None)
+        from_flax = kwargs.pop("from_flax", False)
+        torch_dtype = kwargs.pop("torch_dtype", None)
+        custom_pipeline = kwargs.pop("custom_pipeline", None)
+        custom_revision = kwargs.pop("custom_revision", None)
+        provider = kwargs.pop("provider", None)
+        sess_options = kwargs.pop("sess_options", None)
+        device_map = kwargs.pop("device_map", None)
+        low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT)
+        return_cached_folder = kwargs.pop("return_cached_folder", False)
+        variant = kwargs.pop("variant", None)
+
+        # 1. Download the checkpoints and configs
+        # use snapshot download here to get it working from from_pretrained
+        if not os.path.isdir(pretrained_model_name_or_path):
+            config_dict = cls.load_config(
+                pretrained_model_name_or_path,
+                cache_dir=cache_dir,
+                resume_download=resume_download,
+                force_download=force_download,
+                proxies=proxies,
+                local_files_only=local_files_only,
+                use_auth_token=use_auth_token,
+                revision=revision,
+            )
+
+            # retrieve all folder_names that contain relevant files
+            folder_names = [k for k in config_dict.keys() if not k.startswith("_")]
+
+            if not local_files_only:
+                info = model_info(
+                    pretrained_model_name_or_path,
+                    use_auth_token=use_auth_token,
+                    revision=revision,
+                )
+                loadable_filenames = variant_compatible_siblings(info, variant=variant)
+
+                if from_flax:
+                    ignore_patterns = ["*.bin", "*.safetensors"]
+
+                if is_safetensors_available() and is_safetensors_compatible(loadable_filenames):
+                    ignore_patterns = ["*.bin", "*.msgpack"]
+
+                allow_patterns = create_allow_patterns(from_flax, variant, info, allow_patterns)
+                ignore_patterns = create_ignore_patterns(from_flax, variant, info, allow_patterns)
+            else:
+                # allow everything since it has to be downloaded anyways
+                # create allow patterns
+                # first we allow everything
+                # i. add all files from subfolders
+                allow_patterns = [os.path.join(k, "*") for k in folder_names]
+
+                # ii. add all files that can be loaded from root
+                allow_patterns += [
+                    WEIGHTS_NAME,
+                    _add_variant(WEIGHTS_NAME),
+                    SAFETENSORS_WEIGHTS_NAME,
+                    _add_variant(SAFETENSORS_WEIGHTS_NAME),
+                    SCHEDULER_CONFIG_NAME,
+                    CONFIG_NAME,
+                    ONNX_WEIGHTS_NAME,
+                    FLAX_WEIGHTS_NAME,
+                    cls.config_name,
+                    CUSTOM_PIPELINE_FILE_NAME,
+                ]
+
+            def create_allow_patterns(from_flax, variant, info, allow_patterns):
+                # from flax
+                if from_flax:
+                    for pattern in [WEIGHTS_NAME, ONNX_WEIGHTS_NAME, _add_variant(WEIGHTS_NAME)]:
+                        allow_patterns.remove(pattern)
+                else:
+                    allow_patterns.remove(FLAX_WEIGHTS_NAME)
+
+                allow_pa
+
+                # variant
+
+
+                # safetensors
+#                if is_safetensors_available() and is_safetensors_compatible(info):
+                    # if safetensors is available we can assume that info is defined
+#                    allow_patterns.remove(WEIGHTS_NAME)
+#                else:
+#                    allow_patterns.remove(SAFETENSORS_WEIGHTS_NAME)
+
+
+            if from_flax:
+                allow_patterns += [
+                    FLAX_WEIGHTS_NAME,
+                ]
+                allow_patterns.remove(WEIGHTS_NAME)
+                allow_patterns.remove(ONNX_WEIGHTS_NAME)
+                allow_patterns.remove(_add_variant(WEIGHTS_NAME))
+
+            # make sure we don't download flax weights
+
+            if is_safetensors_available() and not local_files_only:
+                if is_safetensors_compatible(info):
+                    ignore_patterns.append("*.bin")
+                else:
+                    # as a safety mechanism we also don't download safetensors if
+                    # not all safetensors files are there
+                    ignore_patterns.append("*.safetensors")
+            else:
+                ignore_patterns.append("*.safetensors")
+
+            if from_flax:
+                ignore_patterns = ["*.bin", "*.safetensors"]
+            else:
+                ignore_patterns = ["*.msgpack"]
+
+
+            if cls != DiffusionPipeline:
+                requested_pipeline_class = cls.__name__
+            else:
+                requested_pipeline_class = config_dict.get("_class_name", cls.__name__)
+            user_agent = {"pipeline_class": requested_pipeline_class}
+            if custom_pipeline is not None and not custom_pipeline.endswith(".py"):
+                user_agent["custom_pipeline"] = custom_pipeline
+
+            user_agent = http_user_agent(user_agent)
+
+            # download all allow_patterns
+            cached_folder = snapshot_download(
+                pretrained_model_name_or_path,
+                cache_dir=cache_dir,
+                resume_download=resume_download,
+                proxies=proxies,
+                local_files_only=local_files_only,
+                use_auth_token=use_auth_token,
+                revision=revision,
+                allow_patterns=allow_patterns,
+                ignore_patterns=ignore_patterns,
+                user_agent=user_agent,
+            )
+        else:
+            cached_folder = pretrained_model_name_or_path
+            config_dict = cls.load_config(cached_folder)
+
+        # 2. Load the pipeline class, if using custom module then load it from the hub
+        # if we load from explicit class, let's use it
+        if custom_pipeline is not None:
+            if custom_pipeline.endswith(".py"):
+                path = Path(custom_pipeline)
+                # decompose into folder & file
+                file_name = path.name
+                custom_pipeline = path.parent.absolute()
+            else:
+                file_name = CUSTOM_PIPELINE_FILE_NAME
+
+            pipeline_class = get_class_from_dynamic_module(
+                custom_pipeline, module_file=file_name, cache_dir=cache_dir, revision=custom_revision
+            )
+        elif cls != DiffusionPipeline:
+            pipeline_class = cls
+        else:
+            diffusers_module = importlib.import_module(cls.__module__.split(".")[0])
+            pipeline_class = getattr(diffusers_module, config_dict["_class_name"])
+
+        # To be removed in 1.0.0
+        if pipeline_class.__name__ == "StableDiffusionInpaintPipeline" and version.parse(
+            version.parse(config_dict["_diffusers_version"]).base_version
+        ) <= version.parse("0.5.1"):
+            from diffusers import StableDiffusionInpaintPipeline, StableDiffusionInpaintPipelineLegacy
+
+            pipeline_class = StableDiffusionInpaintPipelineLegacy
+
+            deprecation_message = (
+                "You are using a legacy checkpoint for inpainting with Stable Diffusion, therefore we are loading the"
+                f" {StableDiffusionInpaintPipelineLegacy} class instead of {StableDiffusionInpaintPipeline}. For"
+                " better inpainting results, we strongly suggest using Stable Diffusion's official inpainting"
+                " checkpoint: https://huggingface.co/runwayml/stable-diffusion-inpainting instead or adapting your"
+                f" checkpoint {pretrained_model_name_or_path} to the format of"
+                " https://huggingface.co/runwayml/stable-diffusion-inpainting. Note that we do not actively maintain"
+                " the {StableDiffusionInpaintPipelineLegacy} class and will likely remove it in version 1.0.0."
+            )
+            deprecate("StableDiffusionInpaintPipelineLegacy", "1.0.0", deprecation_message, standard_warn=False)
+
+        # some modules can be passed directly to the init
+        # in this case they are already instantiated in `kwargs`
+        # extract them here
+        expected_modules, optional_kwargs = cls._get_signature_keys(pipeline_class)
+        passed_class_obj = {k: kwargs.pop(k) for k in expected_modules if k in kwargs}
+        passed_pipe_kwargs = {k: kwargs.pop(k) for k in optional_kwargs if k in kwargs}
+
+        init_dict, unused_kwargs, _ = pipeline_class.extract_init_dict(config_dict, **kwargs)
+
+        # define init kwargs
+        init_kwargs = {k: init_dict.pop(k) for k in optional_kwargs if k in init_dict}
+        init_kwargs = {**init_kwargs, **passed_pipe_kwargs}
+
+        # remove `null` components
+        def load_module(name, value):
+            if value[0] is None:
+                return False
+            if name in passed_class_obj and passed_class_obj[name] is None:
+                return False
+            return True
+
+        init_dict = {k: v for k, v in init_dict.items() if load_module(k, v)}
+
+        # Special case: safety_checker must be loaded separately when using `from_flax`
+        if from_flax and "safety_checker" in init_dict and "safety_checker" not in passed_class_obj:
+            raise NotImplementedError(
+                "The safety checker cannot be automatically loaded when loading weights `from_flax`."
+                " Please, pass `safety_checker=None` to `from_pretrained`, and load the safety checker"
+                " separately if you need it."
+            )
+
+        if len(unused_kwargs) > 0:
+            logger.warning(
+                f"Keyword arguments {unused_kwargs} are not expected by {pipeline_class.__name__} and will be ignored."
+            )
+
+        if low_cpu_mem_usage and not is_accelerate_available():
+            low_cpu_mem_usage = False
+            logger.warning(
+                "Cannot initialize model with low cpu memory usage because `accelerate` was not found in the"
+                " environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install"
+                " `accelerate` for faster and less memory-intense model loading. You can do so with: \n```\npip"
+                " install accelerate\n```\n."
+            )
+
+        if device_map is not None and not is_torch_version(">=", "1.9.0"):
+            raise NotImplementedError(
+                "Loading and dispatching requires torch >= 1.9.0. Please either update your PyTorch version or set"
+                " `device_map=None`."
+            )
+
+        if low_cpu_mem_usage is True and not is_torch_version(">=", "1.9.0"):
+            raise NotImplementedError(
+                "Low memory initialization requires torch >= 1.9.0. Please either update your PyTorch version or set"
+                " `low_cpu_mem_usage=False`."
+            )
+
+        if low_cpu_mem_usage is False and device_map is not None:
+            raise ValueError(
+                f"You cannot set `low_cpu_mem_usage` to False while using device_map={device_map} for loading and"
+                " dispatching. Please make sure to set `low_cpu_mem_usage=True`."
+            )
+
+        # import it here to avoid circular import
+        from diffusers import pipelines
+
+        # 3. Load each module in the pipeline
+        for name, (library_name, class_name) in init_dict.items():
+            # 3.1 - now that JAX/Flax is an official framework of the library, we might load from Flax names
+            if class_name.startswith("Flax"):
+                class_name = class_name[4:]
+
+            is_pipeline_module = hasattr(pipelines, library_name)
+            loaded_sub_model = None
+
+            # if the model is in a pipeline module, then we load it from the pipeline
+            if name in passed_class_obj:
+                # 1. check that passed_class_obj has correct parent class
+                if not is_pipeline_module:
+                    library = importlib.import_module(library_name)
+                    class_obj = getattr(library, class_name)
+                    importable_classes = LOADABLE_CLASSES[library_name]
+                    class_candidates = {c: getattr(library, c, None) for c in importable_classes.keys()}
+
+                    expected_class_obj = None
+                    for class_name, class_candidate in class_candidates.items():
+                        if class_candidate is not None and issubclass(class_obj, class_candidate):
+                            expected_class_obj = class_candidate
+
+                    if not issubclass(passed_class_obj[name].__class__, expected_class_obj):
+                        raise ValueError(
+                            f"{passed_class_obj[name]} is of type: {type(passed_class_obj[name])}, but should be"
+                            f" {expected_class_obj}"
+                        )
+                else:
+                    logger.warning(
+                        f"You have passed a non-standard module {passed_class_obj[name]}. We cannot verify whether it"
+                        " has the correct type"
+                    )
+
+                # set passed class object
+                loaded_sub_model = passed_class_obj[name]
+            elif is_pipeline_module:
+                pipeline_module = getattr(pipelines, library_name)
+                class_obj = getattr(pipeline_module, class_name)
+                importable_classes = ALL_IMPORTABLE_CLASSES
+                class_candidates = {c: class_obj for c in importable_classes.keys()}
+            else:
+                # else we just import it from the library.
+                library = importlib.import_module(library_name)
+
+                class_obj = getattr(library, class_name)
+                importable_classes = LOADABLE_CLASSES[library_name]
+                class_candidates = {c: getattr(library, c, None) for c in importable_classes.keys()}
+
+            if loaded_sub_model is None:
+                load_method_name = None
+                for class_name, class_candidate in class_candidates.items():
+                    if class_candidate is not None and issubclass(class_obj, class_candidate):
+                        load_method_name = importable_classes[class_name][1]
+
+                if load_method_name is None:
+                    none_module = class_obj.__module__
+                    is_dummy_path = none_module.startswith(DUMMY_MODULES_FOLDER) or none_module.startswith(
+                        TRANSFORMERS_DUMMY_MODULES_FOLDER
+                    )
+                    if is_dummy_path and "dummy" in none_module:
+                        # call class_obj for nice error message of missing requirements
+                        class_obj()
+
+                    raise ValueError(
+                        f"The component {class_obj} of {pipeline_class} cannot be loaded as it does not seem to have"
+                        f" any of the loading methods defined in {ALL_IMPORTABLE_CLASSES}."
+                    )
+
+                load_method = getattr(class_obj, load_method_name)
+                loading_kwargs = {}
+
+                if issubclass(class_obj, torch.nn.Module):
+                    loading_kwargs["torch_dtype"] = torch_dtype
+                if issubclass(class_obj, diffusers.OnnxRuntimeModel):
+                    loading_kwargs["provider"] = provider
+                    loading_kwargs["sess_options"] = sess_options
+
+                is_diffusers_model = issubclass(class_obj, diffusers.ModelMixin)
+                is_transformers_model = (
+                    is_transformers_available()
+                    and issubclass(class_obj, PreTrainedModel)
+                    and version.parse(version.parse(transformers.__version__).base_version) >= version.parse("4.20.0")
+                )
+
+                # When loading a transformers model, if the device_map is None, the weights will be initialized as opposed to diffusers.
+                # To make default loading faster we set the `low_cpu_mem_usage=low_cpu_mem_usage` flag which is `True` by default.
+                # This makes sure that the weights won't be initialized which significantly speeds up loading.
+                if is_diffusers_model or is_transformers_model:
+                    loading_kwargs["device_map"] = device_map
+                    loading_kwargs["variant"] = variant
+                    if from_flax:
+                        loading_kwargs["from_flax"] = True
+
+                    # if `from_flax` and model is transformer model, can currently not load with `low_cpu_mem_usage`
+                    if not (from_flax and is_transformers_model):
+                        loading_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage
+                    else:
+                        loading_kwargs["low_cpu_mem_usage"] = False
+
+                # check if the module is in a subdirectory
+                if os.path.isdir(os.path.join(cached_folder, name)):
+                    loaded_sub_model = load_method(os.path.join(cached_folder, name), **loading_kwargs)
+                else:
+                    # else load from the root directory
+                    loaded_sub_model = load_method(cached_folder, **loading_kwargs)
+
+            init_kwargs[name] = loaded_sub_model  # UNet(...), # DiffusionSchedule(...)
+
+        # 4. Potentially add passed objects if expected
+        missing_modules = set(expected_modules) - set(init_kwargs.keys())
+        passed_modules = list(passed_class_obj.keys())
+        optional_modules = pipeline_class._optional_components
+        if len(missing_modules) > 0 and missing_modules <= set(passed_modules + optional_modules):
+            for module in missing_modules:
+                init_kwargs[module] = passed_class_obj.get(module, None)
+        elif len(missing_modules) > 0:
+            passed_modules = set(list(init_kwargs.keys()) + list(passed_class_obj.keys())) - optional_kwargs
+            raise ValueError(
+                f"Pipeline {pipeline_class} expected {expected_modules}, but only {passed_modules} were passed."
+            )
+
+        # 5. Instantiate the pipeline
+        model = pipeline_class(**init_kwargs)
+
+        if return_cached_folder:
+            return model, cached_folder
+        return model
+
+    @staticmethod
+    def _get_signature_keys(obj):
+        parameters = inspect.signature(obj.__init__).parameters
+        required_parameters = {k: v for k, v in parameters.items() if v.default == inspect._empty}
+        optional_parameters = set({k for k, v in parameters.items() if v.default != inspect._empty})
+        expected_modules = set(required_parameters.keys()) - set(["self"])
+        return expected_modules, optional_parameters
+
+    @property
+    def components(self) -> Dict[str, Any]:
+        r"""
+
+        The `self.components` property can be useful to run different pipelines with the same weights and
+        configurations to not have to re-allocate memory.
+
+        Examples:
+
+        ```py
+        >>> from diffusers import (
+        ...     StableDiffusionPipeline,
+        ...     StableDiffusionImg2ImgPipeline,
+        ...     StableDiffusionInpaintPipeline,
+        ... )
+
+        >>> text2img = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
+        >>> img2img = StableDiffusionImg2ImgPipeline(**text2img.components)
+        >>> inpaint = StableDiffusionInpaintPipeline(**text2img.components)
+        ```
+
+        Returns:
+            A dictionary containing all the modules needed to initialize the pipeline.
+        """
+        expected_modules, optional_parameters = self._get_signature_keys(self)
+        components = {
+            k: getattr(self, k) for k in self.config.keys() if not k.startswith("_") and k not in optional_parameters
+        }
+
+        if set(components.keys()) != expected_modules:
+            raise ValueError(
+                f"{self} has been incorrectly initialized or {self.__class__} is incorrectly implemented. Expected"
+                f" {expected_modules} to be defined, but {components} are defined."
+            )
+
+        return components
+
+    @staticmethod
+    def numpy_to_pil(images):
+        """
+        Convert a numpy image or a batch of images to a PIL image.
+        """
+        if images.ndim == 3:
+            images = images[None, ...]
+        images = (images * 255).round().astype("uint8")
+        if images.shape[-1] == 1:
+            # special case for grayscale (single channel) images
+            pil_images = [Image.fromarray(image.squeeze(), mode="L") for image in images]
+        else:
+            pil_images = [Image.fromarray(image) for image in images]
+
+        return pil_images
+
+    def progress_bar(self, iterable=None, total=None):
+        if not hasattr(self, "_progress_bar_config"):
+            self._progress_bar_config = {}
+        elif not isinstance(self._progress_bar_config, dict):
+            raise ValueError(
+                f"`self._progress_bar_config` should be of type `dict`, but is {type(self._progress_bar_config)}."
+            )
+
+        if iterable is not None:
+            return tqdm(iterable, **self._progress_bar_config)
+        elif total is not None:
+            return tqdm(total=total, **self._progress_bar_config)
+        else:
+            raise ValueError("Either `total` or `iterable` has to be defined.")
+
+    def set_progress_bar_config(self, **kwargs):
+        self._progress_bar_config = kwargs
+
+    def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
+        r"""
+        Enable memory efficient attention as implemented in xformers.
+
+        When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference
+        time. Speed up at training time is not guaranteed.
+
+        Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention
+        is used.
+
+        Parameters:
+            attention_op (`Callable`, *optional*):
+                Override the default `None` operator for use as `op` argument to the
+                [`memory_efficient_attention()`](https://facebookresearch.github.io/xformers/components/ops.html#xformers.ops.memory_efficient_attention)
+                function of xFormers.
+
+        Examples:
+
+        ```py
+        >>> import torch
+        >>> from diffusers import DiffusionPipeline
+        >>> from xformers.ops import MemoryEfficientAttentionFlashAttentionOp
+
+        >>> pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1", torch_dtype=torch.float16)
+        >>> pipe = pipe.to("cuda")
+        >>> pipe.enable_xformers_memory_efficient_attention(attention_op=MemoryEfficientAttentionFlashAttentionOp)
+        >>> # Workaround for not accepting attention shape using VAE for Flash Attention
+        >>> pipe.vae.enable_xformers_memory_efficient_attention(attention_op=None)
+        ```
+        """
+        self.set_use_memory_efficient_attention_xformers(True, attention_op)
+
+    def disable_xformers_memory_efficient_attention(self):
+        r"""
+        Disable memory efficient attention as implemented in xformers.
+        """
+        self.set_use_memory_efficient_attention_xformers(False)
+
+    def set_use_memory_efficient_attention_xformers(
+        self, valid: bool, attention_op: Optional[Callable] = None
+    ) -> None:
+        # Recursively walk through all the children.
+        # Any children which exposes the set_use_memory_efficient_attention_xformers method
+        # gets the message
+        def fn_recursive_set_mem_eff(module: torch.nn.Module):
+            if hasattr(module, "set_use_memory_efficient_attention_xformers"):
+                module.set_use_memory_efficient_attention_xformers(valid, attention_op)
+
+            for child in module.children():
+                fn_recursive_set_mem_eff(child)
+
+        module_names, _, _ = self.extract_init_dict(dict(self.config))
+        for module_name in module_names:
+            module = getattr(self, module_name)
+            if isinstance(module, torch.nn.Module):
+                fn_recursive_set_mem_eff(module)
+
+    def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"):
+        r"""
+        Enable sliced attention computation.
+
+        When this option is enabled, the attention module will split the input tensor in slices, to compute attention
+        in several steps. This is useful to save some memory in exchange for a small speed decrease.
+
+        Args:
+            slice_size (`str` or `int`, *optional*, defaults to `"auto"`):
+                When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If
+                `"max"`, maxium amount of memory will be saved by running only one slice at a time. If a number is
+                provided, uses as many slices as `attention_head_dim // slice_size`. In this case, `attention_head_dim`
+                must be a multiple of `slice_size`.
+        """
+        self.set_attention_slice(slice_size)
+
+    def disable_attention_slicing(self):
+        r"""
+        Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go
+        back to computing attention in one step.
+        """
+        # set slice_size = `None` to disable `attention slicing`
+        self.enable_attention_slicing(None)
+
+    def set_attention_slice(self, slice_size: Optional[int]):
+        module_names, _, _ = self.extract_init_dict(dict(self.config))
+        for module_name in module_names:
+            module = getattr(self, module_name)
+            if isinstance(module, torch.nn.Module) and hasattr(module, "set_attention_slice"):
+                module.set_attention_slice(slice_size)
diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index f71d1d769699..41445bb4b35a 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -89,12 +89,12 @@ def find_tensor_attributes(module: torch.nn.Module) -> List[Tuple[str, Tensor]]:
         return first_tuple[1].dtype
 
 
-def load_state_dict(checkpoint_file: Union[str, os.PathLike]):
+def load_state_dict(checkpoint_file: Union[str, os.PathLike], variant: Optional[str] = None):
     """
     Reads a checkpoint file, returning properly formatted errors if they arise.
     """
     try:
-        if os.path.basename(checkpoint_file) == WEIGHTS_NAME:
+        if os.path.basename(checkpoint_file) == _add_variant(WEIGHTS_NAME, variant):
             return torch.load(checkpoint_file, map_location="cpu")
         else:
             return safetensors.torch.load_file(checkpoint_file, device="cpu")
@@ -141,6 +141,15 @@ def load(module: torch.nn.Module, prefix=""):
     return error_msgs
 
 
+def _add_variant(weights_name: str, variant: Optional[str] = None) -> str:
+    if variant is not None:
+        splits = weights_name.split(".")
+        splits = splits[:-1] + [variant] + splits[-1:]
+        weights_name = ".".join(splits)
+
+    return weights_name
+
+
 class ModelMixin(torch.nn.Module):
     r"""
     Base class for all models.
@@ -250,6 +259,7 @@ def save_pretrained(
         is_main_process: bool = True,
         save_function: Callable = None,
         safe_serialization: bool = False,
+        variant: Optional[str] = None,
     ):
         """
         Save a model and its configuration file to a directory, so that it can be re-loaded using the
@@ -268,6 +278,8 @@ def save_pretrained(
                 `DIFFUSERS_SAVE_MODE`.
             safe_serialization (`bool`, *optional*, defaults to `False`):
                 Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).
+            variant (`str`, *optional*):
+                If specified, weights are saved in the format pytorch_model.<variant>.bin.
         """
         if safe_serialization and not is_safetensors_available():
             raise ImportError("`safe_serialization` requires the `safetensors library: `pip install safetensors`.")
@@ -292,6 +304,7 @@ def save_pretrained(
         state_dict = model_to_save.state_dict()
 
         weights_name = SAFETENSORS_WEIGHTS_NAME if safe_serialization else WEIGHTS_NAME
+        weights_name = _add_variant(weights_name, variant)
 
         # Save the model
         save_function(state_dict, os.path.join(save_directory, weights_name))
@@ -371,6 +384,9 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                 also tries to not use more than 1x model size in CPU memory (including peak memory) while loading the
                 model. This is only supported when torch version >= 1.9.0. If you are using an older version of torch,
                 setting this argument to `True` will raise an error.
+            variant (`str`, *optional*):
+                If specified load weights from `variant` filename, *e.g.* pytorch_model.<variant>.bin. `variant` is
+                ignored when using `from_flax`.
 
         <Tip>
 
@@ -401,6 +417,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
         subfolder = kwargs.pop("subfolder", None)
         device_map = kwargs.pop("device_map", None)
         low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT)
+        variant = kwargs.pop("variant", None)
 
         if low_cpu_mem_usage and not is_accelerate_available():
             low_cpu_mem_usage = False
@@ -488,7 +505,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                 try:
                     model_file = _get_model_file(
                         pretrained_model_name_or_path,
-                        weights_name=SAFETENSORS_WEIGHTS_NAME,
+                        weights_name=_add_variant(SAFETENSORS_WEIGHTS_NAME, variant),
                         cache_dir=cache_dir,
                         force_download=force_download,
                         resume_download=resume_download,
@@ -504,7 +521,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
             if model_file is None:
                 model_file = _get_model_file(
                     pretrained_model_name_or_path,
-                    weights_name=WEIGHTS_NAME,
+                    weights_name=_add_variant(WEIGHTS_NAME, variant),
                     cache_dir=cache_dir,
                     force_download=force_download,
                     resume_download=resume_download,
@@ -538,7 +555,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                 # if device_map is None, load the state dict and move the params from meta device to the cpu
                 if device_map is None:
                     param_device = "cpu"
-                    state_dict = load_state_dict(model_file)
+                    state_dict = load_state_dict(model_file, variant=variant)
                     # move the params from meta device to cpu
                     missing_keys = set(model.state_dict().keys()) - set(state_dict.keys())
                     if len(missing_keys) > 0:
@@ -587,7 +604,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                 )
                 model = cls.from_config(config, **unused_kwargs)
 
-                state_dict = load_state_dict(model_file)
+                state_dict = load_state_dict(model_file, variant=variant)
 
                 model, missing_keys, unexpected_keys, mismatched_keys, error_msgs = cls._load_pretrained_model(
                     model,
diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py
index b6cf92abfcdf..efd6658f6419 100644
--- a/src/diffusers/pipelines/pipeline_utils.py
+++ b/src/diffusers/pipelines/pipeline_utils.py
@@ -32,7 +32,7 @@
 import diffusers
 
 from ..configuration_utils import ConfigMixin
-from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT
+from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, _add_variant
 from ..schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME
 from ..utils import (
     CONFIG_NAME,
@@ -41,6 +41,7 @@
     HF_HUB_OFFLINE,
     ONNX_WEIGHTS_NAME,
     WEIGHTS_NAME,
+    SAFETENSORS_WEIGHTS_NAME,
     BaseOutput,
     deprecate,
     get_class_from_dynamic_module,
@@ -120,15 +121,16 @@ class AudioPipelineOutput(BaseOutput):
     audios: np.ndarray
 
 
-def is_safetensors_compatible(info) -> bool:
-    filenames = set(sibling.rfilename for sibling in info.siblings)
+def is_safetensors_compatible(filenames, variant=None) -> bool:
     pt_filenames = set(filename for filename in filenames if filename.endswith(".bin"))
     is_safetensors_compatible = any(file.endswith(".safetensors") for file in filenames)
+    variant = f".{variant}" if variant is not None else ""
+
     for pt_filename in pt_filenames:
         prefix, raw = os.path.split(pt_filename)
-        if raw == "pytorch_model.bin":
+        if raw == f"pytorch_model{variant}.bin":
             # transformers specific
-            sf_filename = os.path.join(prefix, "model.safetensors")
+            sf_filename = os.path.join(prefix, f"model{variant}.safetensors")
         else:
             sf_filename = pt_filename[: -len(".bin")] + ".safetensors"
         if is_safetensors_compatible and sf_filename not in filenames:
@@ -137,6 +139,24 @@ def is_safetensors_compatible(info) -> bool:
     return is_safetensors_compatible
 
 
+def variant_compatible_siblings(info, variant=None) -> Union[List[os.PathLike], str]:
+    variant = variant or ""
+
+    filenames = set(sibling.rfilename for sibling in info.siblings)
+    save_formats = ["bin", "safetensors", "msgpack", "onnx"]
+
+    variant_filenames = set(f for f in filenames if any(f.endswith(f"{variant}.{s}") for s in save_formats))
+    non_variant_filenames = set(f for f in filenames if (len(f.split(".")) == 2 and any(f.endswith(f".{s}") for s in save_formats)))
+
+    usable_filenames = set(variant_filenames)
+    for f in non_variant_filenames:
+        variant_filename = f"{f.split('.')[0]}.{variant}.{f.split('.')[1]}"
+        if variant_filename not in usable_filenames:
+            usable_filenames.add(f)
+
+    return usable_filenames
+
+
 class DiffusionPipeline(ConfigMixin):
     r"""
     Base class for all models.
@@ -194,6 +214,7 @@ def save_pretrained(
         self,
         save_directory: Union[str, os.PathLike],
         safe_serialization: bool = False,
+        variant: Optional[str] = None,
     ):
         """
         Save all variables of the pipeline that can be saved and loaded as well as the pipelines configuration file to
@@ -205,6 +226,8 @@ def save_pretrained(
                 Directory to which to save. Will be created if it doesn't exist.
             safe_serialization (`bool`, *optional*, defaults to `False`):
                 Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).
+            variant (`str`, *optional*):
+                If specified, weights are saved in the format pytorch_model.<variant>.bin.
         """
         self.save_config(save_directory)
 
@@ -246,12 +269,15 @@ def is_saveable_module(name, value):
             # Call the save method with the argument safe_serialization only if it's supported
             save_method_signature = inspect.signature(save_method)
             save_method_accept_safe = "safe_serialization" in save_method_signature.parameters
+            save_method_accept_variant = "variant" in save_method_signature.parameters
+
+            save_kwargs = {}
             if save_method_accept_safe:
-                save_method(
-                    os.path.join(save_directory, pipeline_component_name), safe_serialization=safe_serialization
-                )
-            else:
-                save_method(os.path.join(save_directory, pipeline_component_name))
+                save_kwargs["safe_serialization"] = safe_serialization
+            if save_method_accept_variant:
+                save_kwargs["variant"] = variant
+
+            save_method(os.path.join(save_directory, pipeline_component_name), **save_kwargs)
 
     def to(self, torch_device: Optional[Union[str, torch.device]] = None):
         if torch_device is None:
@@ -403,6 +429,9 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                 Can be used to overwrite load - and saveable variables - *i.e.* the pipeline components - of the
                 specific pipeline class. The overwritten components are then directly passed to the pipelines
                 `__init__` method. See example below for more information.
+            variant (`str`, *optional*):
+                If specified load weights from `variant` filename, *e.g.* pytorch_model.<variant>.bin. `variant` is
+                ignored when using `from_flax`.
 
         <Tip>
 
@@ -454,6 +483,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
         device_map = kwargs.pop("device_map", None)
         low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT)
         return_cached_folder = kwargs.pop("return_cached_folder", False)
+        variant = kwargs.pop("variant", None)
 
         # 1. Download the checkpoints and configs
         # use snapshot download here to get it working from from_pretrained
@@ -468,28 +498,57 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                 use_auth_token=use_auth_token,
                 revision=revision,
             )
-            # make sure we only download sub-folders and `diffusers` filenames
+
+            # retrieve all folder_names that contain relevant files
             folder_names = [k for k in config_dict.keys() if not k.startswith("_")]
-            allow_patterns = [os.path.join(k, "*") for k in folder_names]
-            allow_patterns += [
-                WEIGHTS_NAME,
-                SCHEDULER_CONFIG_NAME,
-                CONFIG_NAME,
-                ONNX_WEIGHTS_NAME,
-                cls.config_name,
-            ]
-
-            # make sure we don't download flax weights
-            ignore_patterns = ["*.msgpack"]
-
-            if from_flax:
-                ignore_patterns = ["*.bin", "*.safetensors"]
+
+            if not local_files_only:
+                info = model_info(
+                    pretrained_model_name_or_path,
+                    use_auth_token=use_auth_token,
+                    revision=revision,
+                )
+                model_filenames = variant_compatible_siblings(info, variant=variant)
+                model_folder_names = set([os.path.split(f) for f in model_filenames])
+
+                # allow all patterns from non-model folders
+                # this enables downloading schedulers, tokenizers, ...
+                allow_patterns = [os.path.join(k, "*") for k in folder_names if k not in model_folder_names]
+                # also allow downloading config.jsons with the model
+                allow_patterns += [os.path.join(k, "*.json") for k in model_folder_names]
+
+                allow_patterns += [
+                    SCHEDULER_CONFIG_NAME,
+                    CONFIG_NAME,
+                    cls.config_name,
+                    CUSTOM_PIPELINE_FILE_NAME,
+                ]
+
+                if from_flax:
+                    ignore_patterns = ["*.bin", "*.safetensors"]
+                elif is_safetensors_available() and is_safetensors_compatible(model_filenames):
+                    ignore_patterns = ["*.bin", "*.msgpack"]
+                else:
+                    ignore_patterns = ["*.safetensors", "*.msgpack"]
+
+            else:
+                # allow everything since it has to be downloaded anyways
+                allow_patterns = [os.path.join(k, "*") for k in folder_names]
+
                 allow_patterns += [
+                    WEIGHTS_NAME,
+                    _add_variant(WEIGHTS_NAME),
+                    SAFETENSORS_WEIGHTS_NAME,
+                    _add_variant(SAFETENSORS_WEIGHTS_NAME),
+                    SCHEDULER_CONFIG_NAME,
+                    CONFIG_NAME,
+                    ONNX_WEIGHTS_NAME,
                     FLAX_WEIGHTS_NAME,
+                    cls.config_name,
+                    CUSTOM_PIPELINE_FILE_NAME,
                 ]
 
-            if custom_pipeline is not None:
-                allow_patterns += [CUSTOM_PIPELINE_FILE_NAME]
+            import ipdb; ipdb.set_trace()
 
             if cls != DiffusionPipeline:
                 requested_pipeline_class = cls.__name__
@@ -501,21 +560,6 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
 
             user_agent = http_user_agent(user_agent)
 
-            if is_safetensors_available() and not local_files_only:
-                info = model_info(
-                    pretrained_model_name_or_path,
-                    use_auth_token=use_auth_token,
-                    revision=revision,
-                )
-                if is_safetensors_compatible(info):
-                    ignore_patterns.append("*.bin")
-                else:
-                    # as a safety mechanism we also don't download safetensors if
-                    # not all safetensors files are there
-                    ignore_patterns.append("*.safetensors")
-            else:
-                ignore_patterns.append("*.safetensors")
-
             # download all allow_patterns
             cached_folder = snapshot_download(
                 pretrained_model_name_or_path,
@@ -728,6 +772,7 @@ def load_module(name, value):
                 # This makes sure that the weights won't be initialized which significantly speeds up loading.
                 if is_diffusers_model or is_transformers_model:
                     loading_kwargs["device_map"] = device_map
+                    loading_kwargs["variant"] = variant
                     if from_flax:
                         loading_kwargs["from_flax"] = True
 

From 91ee04ec33fccefd42992bb167b8f96c6f7cf5c2 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Thu, 9 Feb 2023 15:59:14 +0200
Subject: [PATCH 02/27] clean

---
 + | 1015 -------------------------------------------------------------
 1 file changed, 1015 deletions(-)
 delete mode 100644 +

diff --git a/+ b/+
deleted file mode 100644
index a8b18a60f250..000000000000
--- a/+
+++ /dev/null
@@ -1,1015 +0,0 @@
-# coding=utf-8
-# Copyright 2022 The HuggingFace Inc. team.
-# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import importlib
-import inspect
-import os
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Union
-
-import numpy as np
-import PIL
-import torch
-from huggingface_hub import model_info, snapshot_download
-from packaging import version
-from PIL import Image
-from tqdm.auto import tqdm
-
-import diffusers
-
-from ..configuration_utils import ConfigMixin
-from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, _add_variant
-from ..schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME
-from ..utils import (
-    CONFIG_NAME,
-    DIFFUSERS_CACHE,
-    FLAX_WEIGHTS_NAME,
-    HF_HUB_OFFLINE,
-    ONNX_WEIGHTS_NAME,
-    WEIGHTS_NAME,
-    SAFETENSORS_WEIGHTS_NAME,
-    BaseOutput,
-    deprecate,
-    get_class_from_dynamic_module,
-    http_user_agent,
-    is_accelerate_available,
-    is_safetensors_available,
-    is_torch_version,
-    is_transformers_available,
-    logging,
-)
-
-
-if is_transformers_available():
-    import transformers
-    from transformers import PreTrainedModel
-
-
-INDEX_FILE = "diffusion_pytorch_model.bin"
-CUSTOM_PIPELINE_FILE_NAME = "pipeline.py"
-DUMMY_MODULES_FOLDER = "diffusers.utils"
-TRANSFORMERS_DUMMY_MODULES_FOLDER = "transformers.utils"
-
-
-logger = logging.get_logger(__name__)
-
-
-LOADABLE_CLASSES = {
-    "diffusers": {
-        "ModelMixin": ["save_pretrained", "from_pretrained"],
-        "SchedulerMixin": ["save_pretrained", "from_pretrained"],
-        "DiffusionPipeline": ["save_pretrained", "from_pretrained"],
-        "OnnxRuntimeModel": ["save_pretrained", "from_pretrained"],
-    },
-    "transformers": {
-        "PreTrainedTokenizer": ["save_pretrained", "from_pretrained"],
-        "PreTrainedTokenizerFast": ["save_pretrained", "from_pretrained"],
-        "PreTrainedModel": ["save_pretrained", "from_pretrained"],
-        "FeatureExtractionMixin": ["save_pretrained", "from_pretrained"],
-        "ProcessorMixin": ["save_pretrained", "from_pretrained"],
-        "ImageProcessingMixin": ["save_pretrained", "from_pretrained"],
-    },
-    "onnxruntime.training": {
-        "ORTModule": ["save_pretrained", "from_pretrained"],
-    },
-}
-
-ALL_IMPORTABLE_CLASSES = {}
-for library in LOADABLE_CLASSES:
-    ALL_IMPORTABLE_CLASSES.update(LOADABLE_CLASSES[library])
-
-
-@dataclass
-class ImagePipelineOutput(BaseOutput):
-    """
-    Output class for image pipelines.
-
-    Args:
-        images (`List[PIL.Image.Image]` or `np.ndarray`)
-            List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width,
-            num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline.
-    """
-
-    images: Union[List[PIL.Image.Image], np.ndarray]
-
-
-@dataclass
-class AudioPipelineOutput(BaseOutput):
-    """
-    Output class for audio pipelines.
-
-    Args:
-        audios (`np.ndarray`)
-            List of denoised samples of shape `(batch_size, num_channels, sample_rate)`. Numpy array present the
-            denoised audio samples of the diffusion pipeline.
-    """
-
-    audios: np.ndarray
-
-
-def is_safetensors_compatible(filenames, variant=None) -> bool:
-    pt_filenames = set(filename for filename in filenames if filename.endswith(".bin"))
-    is_safetensors_compatible = any(file.endswith(".safetensors") for file in filenames)
-    variant = f".{variant}" if variant is not None else ""
-
-    for pt_filename in pt_filenames:
-        prefix, raw = os.path.split(pt_filename)
-        if raw == f"pytorch_model{variant}.bin":
-            # transformers specific
-            sf_filename = os.path.join(prefix, f"model{variant}.safetensors")
-        else:
-            sf_filename = pt_filename[: -len(".bin")] + ".safetensors"
-        if is_safetensors_compatible and sf_filename not in filenames:
-            logger.warning(f"{sf_filename} not found")
-            is_safetensors_compatible = False
-    return is_safetensors_compatible
-
-
-def variant_compatible_siblings(info, variant=None) -> Union[List[os.PathLike], str]:
-    variant = variant or ""
-
-    filenames = set(sibling.rfilename for sibling in info.siblings)
-    save_formats = ["bin", "safetensors", "flax"]
-
-    variant_filenames = set(f for f in filenames if any(f.endswith(f"{variant}.{s}") for s in save_formats))
-    non_variant_filenames = set(f for f in filenames if (len(f.split(".")) == 2 and any(f.endswith(f".{s}") for s in save_formats)))
-
-    usable_filenames = set(variant_filenames)
-    for f in non_variant_filenames:
-        variant_filename = f"{f.split('.')[0]}.{variant}.{f.split('.')[1]}"
-        if variant_filename not in usable_filenames:
-            usable_filenames.add(f)
-
-    return usable_filenames
-
-
-class DiffusionPipeline(ConfigMixin):
-    r"""
-    Base class for all models.
-
-    [`DiffusionPipeline`] takes care of storing all components (models, schedulers, processors) for diffusion pipelines
-    and handles methods for loading, downloading and saving models as well as a few methods common to all pipelines to:
-
-        - move all PyTorch modules to the device of your choice
-        - enabling/disabling the progress bar for the denoising iteration
-
-    Class attributes:
-
-        - **config_name** (`str`) -- name of the config file that will store the class and module names of all
-          components of the diffusion pipeline.
-        - **_optional_components** (List[`str`]) -- list of all components that are optional so they don't have to be
-          passed for the pipeline to function (should be overridden by subclasses).
-    """
-    config_name = "model_index.json"
-    _optional_components = []
-
-    def register_modules(self, **kwargs):
-        # import it here to avoid circular import
-        from diffusers import pipelines
-
-        for name, module in kwargs.items():
-            # retrieve library
-            if module is None:
-                register_dict = {name: (None, None)}
-            else:
-                library = module.__module__.split(".")[0]
-
-                # check if the module is a pipeline module
-                pipeline_dir = module.__module__.split(".")[-2] if len(module.__module__.split(".")) > 2 else None
-                path = module.__module__.split(".")
-                is_pipeline_module = pipeline_dir in path and hasattr(pipelines, pipeline_dir)
-
-                # if library is not in LOADABLE_CLASSES, then it is a custom module.
-                # Or if it's a pipeline module, then the module is inside the pipeline
-                # folder so we set the library to module name.
-                if library not in LOADABLE_CLASSES or is_pipeline_module:
-                    library = pipeline_dir
-
-                # retrieve class_name
-                class_name = module.__class__.__name__
-
-                register_dict = {name: (library, class_name)}
-
-            # save model index config
-            self.register_to_config(**register_dict)
-
-            # set models
-            setattr(self, name, module)
-
-    def save_pretrained(
-        self,
-        save_directory: Union[str, os.PathLike],
-        safe_serialization: bool = False,
-        variant: Optional[str] = None,
-    ):
-        """
-        Save all variables of the pipeline that can be saved and loaded as well as the pipelines configuration file to
-        a directory. A pipeline variable can be saved and loaded if its class implements both a save and loading
-        method. The pipeline can easily be re-loaded using the `[`~DiffusionPipeline.from_pretrained`]` class method.
-
-        Arguments:
-            save_directory (`str` or `os.PathLike`):
-                Directory to which to save. Will be created if it doesn't exist.
-            safe_serialization (`bool`, *optional*, defaults to `False`):
-                Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).
-            variant (`str`, *optional*):
-                If specified, weights are saved in the format pytorch_model.<variant>.bin.
-        """
-        self.save_config(save_directory)
-
-        model_index_dict = dict(self.config)
-        model_index_dict.pop("_class_name")
-        model_index_dict.pop("_diffusers_version")
-        model_index_dict.pop("_module", None)
-
-        expected_modules, optional_kwargs = self._get_signature_keys(self)
-
-        def is_saveable_module(name, value):
-            if name not in expected_modules:
-                return False
-            if name in self._optional_components and value[0] is None:
-                return False
-            return True
-
-        model_index_dict = {k: v for k, v in model_index_dict.items() if is_saveable_module(k, v)}
-
-        for pipeline_component_name in model_index_dict.keys():
-            sub_model = getattr(self, pipeline_component_name)
-            model_cls = sub_model.__class__
-
-            save_method_name = None
-            # search for the model's base class in LOADABLE_CLASSES
-            for library_name, library_classes in LOADABLE_CLASSES.items():
-                library = importlib.import_module(library_name)
-                for base_class, save_load_methods in library_classes.items():
-                    class_candidate = getattr(library, base_class, None)
-                    if class_candidate is not None and issubclass(model_cls, class_candidate):
-                        # if we found a suitable base class in LOADABLE_CLASSES then grab its save method
-                        save_method_name = save_load_methods[0]
-                        break
-                if save_method_name is not None:
-                    break
-
-            save_method = getattr(sub_model, save_method_name)
-
-            # Call the save method with the argument safe_serialization only if it's supported
-            save_method_signature = inspect.signature(save_method)
-            save_method_accept_safe = "safe_serialization" in save_method_signature.parameters
-            save_method_accept_variant = "variant" in save_method_signature.parameters
-
-            save_kwargs = {}
-            if save_method_accept_safe:
-                save_kwargs["safe_serialization"] = safe_serialization
-            if save_method_accept_variant:
-                save_kwargs["variant"] = variant
-
-            save_method(os.path.join(save_directory, pipeline_component_name), **save_kwargs)
-
-    def to(self, torch_device: Optional[Union[str, torch.device]] = None):
-        if torch_device is None:
-            return self
-
-        module_names, _, _ = self.extract_init_dict(dict(self.config))
-        for name in module_names.keys():
-            module = getattr(self, name)
-            if isinstance(module, torch.nn.Module):
-                if module.dtype == torch.float16 and str(torch_device) in ["cpu"]:
-                    logger.warning(
-                        "Pipelines loaded with `torch_dtype=torch.float16` cannot run with `cpu` device. It"
-                        " is not recommended to move them to `cpu` as running them will fail. Please make"
-                        " sure to use an accelerator to run the pipeline in inference, due to the lack of"
-                        " support for`float16` operations on this device in PyTorch. Please, remove the"
-                        " `torch_dtype=torch.float16` argument, or use another device for inference."
-                    )
-                module.to(torch_device)
-        return self
-
-    @property
-    def device(self) -> torch.device:
-        r"""
-        Returns:
-            `torch.device`: The torch device on which the pipeline is located.
-        """
-        module_names, _, _ = self.extract_init_dict(dict(self.config))
-        for name in module_names.keys():
-            module = getattr(self, name)
-            if isinstance(module, torch.nn.Module):
-                return module.device
-        return torch.device("cpu")
-
-    @classmethod
-    def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], **kwargs):
-        r"""
-        Instantiate a PyTorch diffusion pipeline from pre-trained pipeline weights.
-
-        The pipeline is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated).
-
-        The warning *Weights from XXX not initialized from pretrained model* means that the weights of XXX do not come
-        pretrained with the rest of the model. It is up to you to train those weights with a downstream fine-tuning
-        task.
-
-        The warning *Weights from XXX not used in YYY* means that the layer XXX is not used by YYY, therefore those
-        weights are discarded.
-
-        Parameters:
-            pretrained_model_name_or_path (`str` or `os.PathLike`, *optional*):
-                Can be either:
-
-                    - A string, the *repo id* of a pretrained pipeline hosted inside a model repo on
-                      https://huggingface.co/ Valid repo ids have to be located under a user or organization name, like
-                      `CompVis/ldm-text2im-large-256`.
-                    - A path to a *directory* containing pipeline weights saved using
-                      [`~DiffusionPipeline.save_pretrained`], e.g., `./my_pipeline_directory/`.
-            torch_dtype (`str` or `torch.dtype`, *optional*):
-                Override the default `torch.dtype` and load the model under this dtype. If `"auto"` is passed the dtype
-                will be automatically derived from the model's weights.
-            custom_pipeline (`str`, *optional*):
-
-                <Tip warning={true}>
-
-                    This is an experimental feature and is likely to change in the future.
-
-                </Tip>
-
-                Can be either:
-
-                    - A string, the *repo id* of a custom pipeline hosted inside a model repo on
-                      https://huggingface.co/. Valid repo ids have to be located under a user or organization name,
-                      like `hf-internal-testing/diffusers-dummy-pipeline`.
-
-                        <Tip>
-
-                         It is required that the model repo has a file, called `pipeline.py` that defines the custom
-                         pipeline.
-
-                        </Tip>
-
-                    - A string, the *file name* of a community pipeline hosted on GitHub under
-                      https://github.com/huggingface/diffusers/tree/main/examples/community. Valid file names have to
-                      match exactly the file name without `.py` located under the above link, *e.g.*
-                      `clip_guided_stable_diffusion`.
-
-                        <Tip>
-
-                         Community pipelines are always loaded from the current `main` branch of GitHub.
-
-                        </Tip>
-
-                    - A path to a *directory* containing a custom pipeline, e.g., `./my_pipeline_directory/`.
-
-                        <Tip>
-
-                         It is required that the directory has a file, called `pipeline.py` that defines the custom
-                         pipeline.
-
-                        </Tip>
-
-                For more information on how to load and create custom pipelines, please have a look at [Loading and
-                Adding Custom
-                Pipelines](https://huggingface.co/docs/diffusers/using-diffusers/custom_pipeline_overview)
-
-            force_download (`bool`, *optional*, defaults to `False`):
-                Whether or not to force the (re-)download of the model weights and configuration files, overriding the
-                cached versions if they exist.
-            resume_download (`bool`, *optional*, defaults to `False`):
-                Whether or not to delete incompletely received files. Will attempt to resume the download if such a
-                file exists.
-            proxies (`Dict[str, str]`, *optional*):
-                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
-                'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
-            output_loading_info(`bool`, *optional*, defaults to `False`):
-                Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages.
-            local_files_only(`bool`, *optional*, defaults to `False`):
-                Whether or not to only look at local files (i.e., do not try to download the model).
-            use_auth_token (`str` or *bool*, *optional*):
-                The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
-                when running `huggingface-cli login` (stored in `~/.huggingface`).
-            revision (`str`, *optional*, defaults to `"main"`):
-                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
-                git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
-                identifier allowed by git.
-            custom_revision (`str`, *optional*, defaults to `"main"` when loading from the Hub and to local version of `diffusers` when loading from GitHub):
-                The specific model version to use. It can be a branch name, a tag name, or a commit id similar to
-                `revision` when loading a custom pipeline from the Hub. It can be a diffusers version when loading a
-                custom pipeline from GitHub.
-            mirror (`str`, *optional*):
-                Mirror source to accelerate downloads in China. If you are from China and have an accessibility
-                problem, you can set this option to resolve it. Note that we do not guarantee the timeliness or safety.
-                Please refer to the mirror site for more information. specify the folder name here.
-            device_map (`str` or `Dict[str, Union[int, str, torch.device]]`, *optional*):
-                A map that specifies where each submodule should go. It doesn't need to be refined to each
-                parameter/buffer name, once a given module name is inside, every submodule of it will be sent to the
-                same device.
-
-                To have Accelerate compute the most optimized `device_map` automatically, set `device_map="auto"`. For
-                more information about each option see [designing a device
-                map](https://hf.co/docs/accelerate/main/en/usage_guides/big_modeling#designing-a-device-map).
-            low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`):
-                Speed up model loading by not initializing the weights and only loading the pre-trained weights. This
-                also tries to not use more than 1x model size in CPU memory (including peak memory) while loading the
-                model. This is only supported when torch version >= 1.9.0. If you are using an older version of torch,
-                setting this argument to `True` will raise an error.
-            return_cached_folder (`bool`, *optional*, defaults to `False`):
-                If set to `True`, path to downloaded cached folder will be returned in addition to loaded pipeline.
-            kwargs (remaining dictionary of keyword arguments, *optional*):
-                Can be used to overwrite load - and saveable variables - *i.e.* the pipeline components - of the
-                specific pipeline class. The overwritten components are then directly passed to the pipelines
-                `__init__` method. See example below for more information.
-            variant (`str`, *optional*):
-                If specified load weights from `variant` filename, *e.g.* pytorch_model.<variant>.bin. `variant` is
-                ignored when using `from_flax`.
-
-        <Tip>
-
-         It is required to be logged in (`huggingface-cli login`) when you want to use private or [gated
-         models](https://huggingface.co/docs/hub/models-gated#gated-models), *e.g.* `"runwayml/stable-diffusion-v1-5"`
-
-        </Tip>
-
-        <Tip>
-
-        Activate the special ["offline-mode"](https://huggingface.co/diffusers/installation.html#offline-mode) to use
-        this method in a firewalled environment.
-
-        </Tip>
-
-        Examples:
-
-        ```py
-        >>> from diffusers import DiffusionPipeline
-
-        >>> # Download pipeline from huggingface.co and cache.
-        >>> pipeline = DiffusionPipeline.from_pretrained("CompVis/ldm-text2im-large-256")
-
-        >>> # Download pipeline that requires an authorization token
-        >>> # For more information on access tokens, please refer to this section
-        >>> # of the documentation](https://huggingface.co/docs/hub/security-tokens)
-        >>> pipeline = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
-
-        >>> # Use a different scheduler
-        >>> from diffusers import LMSDiscreteScheduler
-
-        >>> scheduler = LMSDiscreteScheduler.from_config(pipeline.scheduler.config)
-        >>> pipeline.scheduler = scheduler
-        ```
-        """
-        cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
-        resume_download = kwargs.pop("resume_download", False)
-        force_download = kwargs.pop("force_download", False)
-        proxies = kwargs.pop("proxies", None)
-        local_files_only = kwargs.pop("local_files_only", HF_HUB_OFFLINE)
-        use_auth_token = kwargs.pop("use_auth_token", None)
-        revision = kwargs.pop("revision", None)
-        from_flax = kwargs.pop("from_flax", False)
-        torch_dtype = kwargs.pop("torch_dtype", None)
-        custom_pipeline = kwargs.pop("custom_pipeline", None)
-        custom_revision = kwargs.pop("custom_revision", None)
-        provider = kwargs.pop("provider", None)
-        sess_options = kwargs.pop("sess_options", None)
-        device_map = kwargs.pop("device_map", None)
-        low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT)
-        return_cached_folder = kwargs.pop("return_cached_folder", False)
-        variant = kwargs.pop("variant", None)
-
-        # 1. Download the checkpoints and configs
-        # use snapshot download here to get it working from from_pretrained
-        if not os.path.isdir(pretrained_model_name_or_path):
-            config_dict = cls.load_config(
-                pretrained_model_name_or_path,
-                cache_dir=cache_dir,
-                resume_download=resume_download,
-                force_download=force_download,
-                proxies=proxies,
-                local_files_only=local_files_only,
-                use_auth_token=use_auth_token,
-                revision=revision,
-            )
-
-            # retrieve all folder_names that contain relevant files
-            folder_names = [k for k in config_dict.keys() if not k.startswith("_")]
-
-            if not local_files_only:
-                info = model_info(
-                    pretrained_model_name_or_path,
-                    use_auth_token=use_auth_token,
-                    revision=revision,
-                )
-                loadable_filenames = variant_compatible_siblings(info, variant=variant)
-
-                if from_flax:
-                    ignore_patterns = ["*.bin", "*.safetensors"]
-
-                if is_safetensors_available() and is_safetensors_compatible(loadable_filenames):
-                    ignore_patterns = ["*.bin", "*.msgpack"]
-
-                allow_patterns = create_allow_patterns(from_flax, variant, info, allow_patterns)
-                ignore_patterns = create_ignore_patterns(from_flax, variant, info, allow_patterns)
-            else:
-                # allow everything since it has to be downloaded anyways
-                # create allow patterns
-                # first we allow everything
-                # i. add all files from subfolders
-                allow_patterns = [os.path.join(k, "*") for k in folder_names]
-
-                # ii. add all files that can be loaded from root
-                allow_patterns += [
-                    WEIGHTS_NAME,
-                    _add_variant(WEIGHTS_NAME),
-                    SAFETENSORS_WEIGHTS_NAME,
-                    _add_variant(SAFETENSORS_WEIGHTS_NAME),
-                    SCHEDULER_CONFIG_NAME,
-                    CONFIG_NAME,
-                    ONNX_WEIGHTS_NAME,
-                    FLAX_WEIGHTS_NAME,
-                    cls.config_name,
-                    CUSTOM_PIPELINE_FILE_NAME,
-                ]
-
-            def create_allow_patterns(from_flax, variant, info, allow_patterns):
-                # from flax
-                if from_flax:
-                    for pattern in [WEIGHTS_NAME, ONNX_WEIGHTS_NAME, _add_variant(WEIGHTS_NAME)]:
-                        allow_patterns.remove(pattern)
-                else:
-                    allow_patterns.remove(FLAX_WEIGHTS_NAME)
-
-                allow_pa
-
-                # variant
-
-
-                # safetensors
-#                if is_safetensors_available() and is_safetensors_compatible(info):
-                    # if safetensors is available we can assume that info is defined
-#                    allow_patterns.remove(WEIGHTS_NAME)
-#                else:
-#                    allow_patterns.remove(SAFETENSORS_WEIGHTS_NAME)
-
-
-            if from_flax:
-                allow_patterns += [
-                    FLAX_WEIGHTS_NAME,
-                ]
-                allow_patterns.remove(WEIGHTS_NAME)
-                allow_patterns.remove(ONNX_WEIGHTS_NAME)
-                allow_patterns.remove(_add_variant(WEIGHTS_NAME))
-
-            # make sure we don't download flax weights
-
-            if is_safetensors_available() and not local_files_only:
-                if is_safetensors_compatible(info):
-                    ignore_patterns.append("*.bin")
-                else:
-                    # as a safety mechanism we also don't download safetensors if
-                    # not all safetensors files are there
-                    ignore_patterns.append("*.safetensors")
-            else:
-                ignore_patterns.append("*.safetensors")
-
-            if from_flax:
-                ignore_patterns = ["*.bin", "*.safetensors"]
-            else:
-                ignore_patterns = ["*.msgpack"]
-
-
-            if cls != DiffusionPipeline:
-                requested_pipeline_class = cls.__name__
-            else:
-                requested_pipeline_class = config_dict.get("_class_name", cls.__name__)
-            user_agent = {"pipeline_class": requested_pipeline_class}
-            if custom_pipeline is not None and not custom_pipeline.endswith(".py"):
-                user_agent["custom_pipeline"] = custom_pipeline
-
-            user_agent = http_user_agent(user_agent)
-
-            # download all allow_patterns
-            cached_folder = snapshot_download(
-                pretrained_model_name_or_path,
-                cache_dir=cache_dir,
-                resume_download=resume_download,
-                proxies=proxies,
-                local_files_only=local_files_only,
-                use_auth_token=use_auth_token,
-                revision=revision,
-                allow_patterns=allow_patterns,
-                ignore_patterns=ignore_patterns,
-                user_agent=user_agent,
-            )
-        else:
-            cached_folder = pretrained_model_name_or_path
-            config_dict = cls.load_config(cached_folder)
-
-        # 2. Load the pipeline class, if using custom module then load it from the hub
-        # if we load from explicit class, let's use it
-        if custom_pipeline is not None:
-            if custom_pipeline.endswith(".py"):
-                path = Path(custom_pipeline)
-                # decompose into folder & file
-                file_name = path.name
-                custom_pipeline = path.parent.absolute()
-            else:
-                file_name = CUSTOM_PIPELINE_FILE_NAME
-
-            pipeline_class = get_class_from_dynamic_module(
-                custom_pipeline, module_file=file_name, cache_dir=cache_dir, revision=custom_revision
-            )
-        elif cls != DiffusionPipeline:
-            pipeline_class = cls
-        else:
-            diffusers_module = importlib.import_module(cls.__module__.split(".")[0])
-            pipeline_class = getattr(diffusers_module, config_dict["_class_name"])
-
-        # To be removed in 1.0.0
-        if pipeline_class.__name__ == "StableDiffusionInpaintPipeline" and version.parse(
-            version.parse(config_dict["_diffusers_version"]).base_version
-        ) <= version.parse("0.5.1"):
-            from diffusers import StableDiffusionInpaintPipeline, StableDiffusionInpaintPipelineLegacy
-
-            pipeline_class = StableDiffusionInpaintPipelineLegacy
-
-            deprecation_message = (
-                "You are using a legacy checkpoint for inpainting with Stable Diffusion, therefore we are loading the"
-                f" {StableDiffusionInpaintPipelineLegacy} class instead of {StableDiffusionInpaintPipeline}. For"
-                " better inpainting results, we strongly suggest using Stable Diffusion's official inpainting"
-                " checkpoint: https://huggingface.co/runwayml/stable-diffusion-inpainting instead or adapting your"
-                f" checkpoint {pretrained_model_name_or_path} to the format of"
-                " https://huggingface.co/runwayml/stable-diffusion-inpainting. Note that we do not actively maintain"
-                " the {StableDiffusionInpaintPipelineLegacy} class and will likely remove it in version 1.0.0."
-            )
-            deprecate("StableDiffusionInpaintPipelineLegacy", "1.0.0", deprecation_message, standard_warn=False)
-
-        # some modules can be passed directly to the init
-        # in this case they are already instantiated in `kwargs`
-        # extract them here
-        expected_modules, optional_kwargs = cls._get_signature_keys(pipeline_class)
-        passed_class_obj = {k: kwargs.pop(k) for k in expected_modules if k in kwargs}
-        passed_pipe_kwargs = {k: kwargs.pop(k) for k in optional_kwargs if k in kwargs}
-
-        init_dict, unused_kwargs, _ = pipeline_class.extract_init_dict(config_dict, **kwargs)
-
-        # define init kwargs
-        init_kwargs = {k: init_dict.pop(k) for k in optional_kwargs if k in init_dict}
-        init_kwargs = {**init_kwargs, **passed_pipe_kwargs}
-
-        # remove `null` components
-        def load_module(name, value):
-            if value[0] is None:
-                return False
-            if name in passed_class_obj and passed_class_obj[name] is None:
-                return False
-            return True
-
-        init_dict = {k: v for k, v in init_dict.items() if load_module(k, v)}
-
-        # Special case: safety_checker must be loaded separately when using `from_flax`
-        if from_flax and "safety_checker" in init_dict and "safety_checker" not in passed_class_obj:
-            raise NotImplementedError(
-                "The safety checker cannot be automatically loaded when loading weights `from_flax`."
-                " Please, pass `safety_checker=None` to `from_pretrained`, and load the safety checker"
-                " separately if you need it."
-            )
-
-        if len(unused_kwargs) > 0:
-            logger.warning(
-                f"Keyword arguments {unused_kwargs} are not expected by {pipeline_class.__name__} and will be ignored."
-            )
-
-        if low_cpu_mem_usage and not is_accelerate_available():
-            low_cpu_mem_usage = False
-            logger.warning(
-                "Cannot initialize model with low cpu memory usage because `accelerate` was not found in the"
-                " environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install"
-                " `accelerate` for faster and less memory-intense model loading. You can do so with: \n```\npip"
-                " install accelerate\n```\n."
-            )
-
-        if device_map is not None and not is_torch_version(">=", "1.9.0"):
-            raise NotImplementedError(
-                "Loading and dispatching requires torch >= 1.9.0. Please either update your PyTorch version or set"
-                " `device_map=None`."
-            )
-
-        if low_cpu_mem_usage is True and not is_torch_version(">=", "1.9.0"):
-            raise NotImplementedError(
-                "Low memory initialization requires torch >= 1.9.0. Please either update your PyTorch version or set"
-                " `low_cpu_mem_usage=False`."
-            )
-
-        if low_cpu_mem_usage is False and device_map is not None:
-            raise ValueError(
-                f"You cannot set `low_cpu_mem_usage` to False while using device_map={device_map} for loading and"
-                " dispatching. Please make sure to set `low_cpu_mem_usage=True`."
-            )
-
-        # import it here to avoid circular import
-        from diffusers import pipelines
-
-        # 3. Load each module in the pipeline
-        for name, (library_name, class_name) in init_dict.items():
-            # 3.1 - now that JAX/Flax is an official framework of the library, we might load from Flax names
-            if class_name.startswith("Flax"):
-                class_name = class_name[4:]
-
-            is_pipeline_module = hasattr(pipelines, library_name)
-            loaded_sub_model = None
-
-            # if the model is in a pipeline module, then we load it from the pipeline
-            if name in passed_class_obj:
-                # 1. check that passed_class_obj has correct parent class
-                if not is_pipeline_module:
-                    library = importlib.import_module(library_name)
-                    class_obj = getattr(library, class_name)
-                    importable_classes = LOADABLE_CLASSES[library_name]
-                    class_candidates = {c: getattr(library, c, None) for c in importable_classes.keys()}
-
-                    expected_class_obj = None
-                    for class_name, class_candidate in class_candidates.items():
-                        if class_candidate is not None and issubclass(class_obj, class_candidate):
-                            expected_class_obj = class_candidate
-
-                    if not issubclass(passed_class_obj[name].__class__, expected_class_obj):
-                        raise ValueError(
-                            f"{passed_class_obj[name]} is of type: {type(passed_class_obj[name])}, but should be"
-                            f" {expected_class_obj}"
-                        )
-                else:
-                    logger.warning(
-                        f"You have passed a non-standard module {passed_class_obj[name]}. We cannot verify whether it"
-                        " has the correct type"
-                    )
-
-                # set passed class object
-                loaded_sub_model = passed_class_obj[name]
-            elif is_pipeline_module:
-                pipeline_module = getattr(pipelines, library_name)
-                class_obj = getattr(pipeline_module, class_name)
-                importable_classes = ALL_IMPORTABLE_CLASSES
-                class_candidates = {c: class_obj for c in importable_classes.keys()}
-            else:
-                # else we just import it from the library.
-                library = importlib.import_module(library_name)
-
-                class_obj = getattr(library, class_name)
-                importable_classes = LOADABLE_CLASSES[library_name]
-                class_candidates = {c: getattr(library, c, None) for c in importable_classes.keys()}
-
-            if loaded_sub_model is None:
-                load_method_name = None
-                for class_name, class_candidate in class_candidates.items():
-                    if class_candidate is not None and issubclass(class_obj, class_candidate):
-                        load_method_name = importable_classes[class_name][1]
-
-                if load_method_name is None:
-                    none_module = class_obj.__module__
-                    is_dummy_path = none_module.startswith(DUMMY_MODULES_FOLDER) or none_module.startswith(
-                        TRANSFORMERS_DUMMY_MODULES_FOLDER
-                    )
-                    if is_dummy_path and "dummy" in none_module:
-                        # call class_obj for nice error message of missing requirements
-                        class_obj()
-
-                    raise ValueError(
-                        f"The component {class_obj} of {pipeline_class} cannot be loaded as it does not seem to have"
-                        f" any of the loading methods defined in {ALL_IMPORTABLE_CLASSES}."
-                    )
-
-                load_method = getattr(class_obj, load_method_name)
-                loading_kwargs = {}
-
-                if issubclass(class_obj, torch.nn.Module):
-                    loading_kwargs["torch_dtype"] = torch_dtype
-                if issubclass(class_obj, diffusers.OnnxRuntimeModel):
-                    loading_kwargs["provider"] = provider
-                    loading_kwargs["sess_options"] = sess_options
-
-                is_diffusers_model = issubclass(class_obj, diffusers.ModelMixin)
-                is_transformers_model = (
-                    is_transformers_available()
-                    and issubclass(class_obj, PreTrainedModel)
-                    and version.parse(version.parse(transformers.__version__).base_version) >= version.parse("4.20.0")
-                )
-
-                # When loading a transformers model, if the device_map is None, the weights will be initialized as opposed to diffusers.
-                # To make default loading faster we set the `low_cpu_mem_usage=low_cpu_mem_usage` flag which is `True` by default.
-                # This makes sure that the weights won't be initialized which significantly speeds up loading.
-                if is_diffusers_model or is_transformers_model:
-                    loading_kwargs["device_map"] = device_map
-                    loading_kwargs["variant"] = variant
-                    if from_flax:
-                        loading_kwargs["from_flax"] = True
-
-                    # if `from_flax` and model is transformer model, can currently not load with `low_cpu_mem_usage`
-                    if not (from_flax and is_transformers_model):
-                        loading_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage
-                    else:
-                        loading_kwargs["low_cpu_mem_usage"] = False
-
-                # check if the module is in a subdirectory
-                if os.path.isdir(os.path.join(cached_folder, name)):
-                    loaded_sub_model = load_method(os.path.join(cached_folder, name), **loading_kwargs)
-                else:
-                    # else load from the root directory
-                    loaded_sub_model = load_method(cached_folder, **loading_kwargs)
-
-            init_kwargs[name] = loaded_sub_model  # UNet(...), # DiffusionSchedule(...)
-
-        # 4. Potentially add passed objects if expected
-        missing_modules = set(expected_modules) - set(init_kwargs.keys())
-        passed_modules = list(passed_class_obj.keys())
-        optional_modules = pipeline_class._optional_components
-        if len(missing_modules) > 0 and missing_modules <= set(passed_modules + optional_modules):
-            for module in missing_modules:
-                init_kwargs[module] = passed_class_obj.get(module, None)
-        elif len(missing_modules) > 0:
-            passed_modules = set(list(init_kwargs.keys()) + list(passed_class_obj.keys())) - optional_kwargs
-            raise ValueError(
-                f"Pipeline {pipeline_class} expected {expected_modules}, but only {passed_modules} were passed."
-            )
-
-        # 5. Instantiate the pipeline
-        model = pipeline_class(**init_kwargs)
-
-        if return_cached_folder:
-            return model, cached_folder
-        return model
-
-    @staticmethod
-    def _get_signature_keys(obj):
-        parameters = inspect.signature(obj.__init__).parameters
-        required_parameters = {k: v for k, v in parameters.items() if v.default == inspect._empty}
-        optional_parameters = set({k for k, v in parameters.items() if v.default != inspect._empty})
-        expected_modules = set(required_parameters.keys()) - set(["self"])
-        return expected_modules, optional_parameters
-
-    @property
-    def components(self) -> Dict[str, Any]:
-        r"""
-
-        The `self.components` property can be useful to run different pipelines with the same weights and
-        configurations to not have to re-allocate memory.
-
-        Examples:
-
-        ```py
-        >>> from diffusers import (
-        ...     StableDiffusionPipeline,
-        ...     StableDiffusionImg2ImgPipeline,
-        ...     StableDiffusionInpaintPipeline,
-        ... )
-
-        >>> text2img = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
-        >>> img2img = StableDiffusionImg2ImgPipeline(**text2img.components)
-        >>> inpaint = StableDiffusionInpaintPipeline(**text2img.components)
-        ```
-
-        Returns:
-            A dictionary containing all the modules needed to initialize the pipeline.
-        """
-        expected_modules, optional_parameters = self._get_signature_keys(self)
-        components = {
-            k: getattr(self, k) for k in self.config.keys() if not k.startswith("_") and k not in optional_parameters
-        }
-
-        if set(components.keys()) != expected_modules:
-            raise ValueError(
-                f"{self} has been incorrectly initialized or {self.__class__} is incorrectly implemented. Expected"
-                f" {expected_modules} to be defined, but {components} are defined."
-            )
-
-        return components
-
-    @staticmethod
-    def numpy_to_pil(images):
-        """
-        Convert a numpy image or a batch of images to a PIL image.
-        """
-        if images.ndim == 3:
-            images = images[None, ...]
-        images = (images * 255).round().astype("uint8")
-        if images.shape[-1] == 1:
-            # special case for grayscale (single channel) images
-            pil_images = [Image.fromarray(image.squeeze(), mode="L") for image in images]
-        else:
-            pil_images = [Image.fromarray(image) for image in images]
-
-        return pil_images
-
-    def progress_bar(self, iterable=None, total=None):
-        if not hasattr(self, "_progress_bar_config"):
-            self._progress_bar_config = {}
-        elif not isinstance(self._progress_bar_config, dict):
-            raise ValueError(
-                f"`self._progress_bar_config` should be of type `dict`, but is {type(self._progress_bar_config)}."
-            )
-
-        if iterable is not None:
-            return tqdm(iterable, **self._progress_bar_config)
-        elif total is not None:
-            return tqdm(total=total, **self._progress_bar_config)
-        else:
-            raise ValueError("Either `total` or `iterable` has to be defined.")
-
-    def set_progress_bar_config(self, **kwargs):
-        self._progress_bar_config = kwargs
-
-    def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
-        r"""
-        Enable memory efficient attention as implemented in xformers.
-
-        When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference
-        time. Speed up at training time is not guaranteed.
-
-        Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention
-        is used.
-
-        Parameters:
-            attention_op (`Callable`, *optional*):
-                Override the default `None` operator for use as `op` argument to the
-                [`memory_efficient_attention()`](https://facebookresearch.github.io/xformers/components/ops.html#xformers.ops.memory_efficient_attention)
-                function of xFormers.
-
-        Examples:
-
-        ```py
-        >>> import torch
-        >>> from diffusers import DiffusionPipeline
-        >>> from xformers.ops import MemoryEfficientAttentionFlashAttentionOp
-
-        >>> pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1", torch_dtype=torch.float16)
-        >>> pipe = pipe.to("cuda")
-        >>> pipe.enable_xformers_memory_efficient_attention(attention_op=MemoryEfficientAttentionFlashAttentionOp)
-        >>> # Workaround for not accepting attention shape using VAE for Flash Attention
-        >>> pipe.vae.enable_xformers_memory_efficient_attention(attention_op=None)
-        ```
-        """
-        self.set_use_memory_efficient_attention_xformers(True, attention_op)
-
-    def disable_xformers_memory_efficient_attention(self):
-        r"""
-        Disable memory efficient attention as implemented in xformers.
-        """
-        self.set_use_memory_efficient_attention_xformers(False)
-
-    def set_use_memory_efficient_attention_xformers(
-        self, valid: bool, attention_op: Optional[Callable] = None
-    ) -> None:
-        # Recursively walk through all the children.
-        # Any children which exposes the set_use_memory_efficient_attention_xformers method
-        # gets the message
-        def fn_recursive_set_mem_eff(module: torch.nn.Module):
-            if hasattr(module, "set_use_memory_efficient_attention_xformers"):
-                module.set_use_memory_efficient_attention_xformers(valid, attention_op)
-
-            for child in module.children():
-                fn_recursive_set_mem_eff(child)
-
-        module_names, _, _ = self.extract_init_dict(dict(self.config))
-        for module_name in module_names:
-            module = getattr(self, module_name)
-            if isinstance(module, torch.nn.Module):
-                fn_recursive_set_mem_eff(module)
-
-    def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"):
-        r"""
-        Enable sliced attention computation.
-
-        When this option is enabled, the attention module will split the input tensor in slices, to compute attention
-        in several steps. This is useful to save some memory in exchange for a small speed decrease.
-
-        Args:
-            slice_size (`str` or `int`, *optional*, defaults to `"auto"`):
-                When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If
-                `"max"`, maxium amount of memory will be saved by running only one slice at a time. If a number is
-                provided, uses as many slices as `attention_head_dim // slice_size`. In this case, `attention_head_dim`
-                must be a multiple of `slice_size`.
-        """
-        self.set_attention_slice(slice_size)
-
-    def disable_attention_slicing(self):
-        r"""
-        Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go
-        back to computing attention in one step.
-        """
-        # set slice_size = `None` to disable `attention slicing`
-        self.enable_attention_slicing(None)
-
-    def set_attention_slice(self, slice_size: Optional[int]):
-        module_names, _, _ = self.extract_init_dict(dict(self.config))
-        for module_name in module_names:
-            module = getattr(self, module_name)
-            if isinstance(module, torch.nn.Module) and hasattr(module, "set_attention_slice"):
-                module.set_attention_slice(slice_size)

From 0b45377a25ab67ee365e1194dc8a4fe9963b2754 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Thu, 9 Feb 2023 18:02:46 +0200
Subject: [PATCH 03/27] improve further

---
 src/diffusers/pipelines/pipeline_utils.py | 28 ++++++++++++++++++-----
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py
index efd6658f6419..6f6b392bb239 100644
--- a/src/diffusers/pipelines/pipeline_utils.py
+++ b/src/diffusers/pipelines/pipeline_utils.py
@@ -154,6 +154,9 @@ def variant_compatible_siblings(info, variant=None) -> Union[List[os.PathLike],
         if variant_filename not in usable_filenames:
             usable_filenames.add(f)
 
+    if len(variant_filenames) > 0 and usable_filenames != variant_filenames:
+        logger.warn(f"\nA mixture of {variant} and non-{variant} filenames will be loaded.\nLoaded {variant} filenames:\n[{', '.join(variant_filenames)}]\nLoaded non-{variant} filenames:\n[{', '.join(usable_filenames - variant_filenames)} from repository files: {', '.join(filenames)}]\nIf this behavior is not expected, please check your folder structure.")
+
     return usable_filenames
 
 
@@ -500,7 +503,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
             )
 
             # retrieve all folder_names that contain relevant files
-            folder_names = [k for k in config_dict.keys() if not k.startswith("_")]
+            folder_names = [k for k, v in config_dict.items() if isinstance(v, list)]
 
             if not local_files_only:
                 info = model_info(
@@ -509,11 +512,14 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                     revision=revision,
                 )
                 model_filenames = variant_compatible_siblings(info, variant=variant)
-                model_folder_names = set([os.path.split(f) for f in model_filenames])
+                model_folder_names = set([os.path.split(f)[0] for f in model_filenames])
+
+                # all filenames compatible with variant will be added
+                allow_patterns = list(model_filenames)
 
                 # allow all patterns from non-model folders
                 # this enables downloading schedulers, tokenizers, ...
-                allow_patterns = [os.path.join(k, "*") for k in folder_names if k not in model_folder_names]
+                allow_patterns += [os.path.join(k, "*") for k in folder_names if k not in model_folder_names]
                 # also allow downloading config.jsons with the model
                 allow_patterns += [os.path.join(k, "*.json") for k in model_folder_names]
 
@@ -548,8 +554,6 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                     CUSTOM_PIPELINE_FILE_NAME,
                 ]
 
-            import ipdb; ipdb.set_trace()
-
             if cls != DiffusionPipeline:
                 requested_pipeline_class = cls.__name__
             else:
@@ -577,6 +581,18 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
             cached_folder = pretrained_model_name_or_path
             config_dict = cls.load_config(cached_folder)
 
+        # retrieve which subfolders should load variants
+        model_variants = {}
+        if variant is not None:
+            for folder in os.listdir(cached_folder):
+                folder_path = os.path.join(cached_folder, folder)
+                variant_exists = os.path.isdir(folder_path) and any(path.split(".")[1] == variant for path in os.listdir(folder_path))
+                if variant_exists:
+                    model_variants[folder] = variant
+
+        # TODO(PVP) - delete if not needed anymore
+        print(os.system(f"cd {cached_folder} && tree"))
+
         # 2. Load the pipeline class, if using custom module then load it from the hub
         # if we load from explicit class, let's use it
         if custom_pipeline is not None:
@@ -772,7 +788,7 @@ def load_module(name, value):
                 # This makes sure that the weights won't be initialized which significantly speeds up loading.
                 if is_diffusers_model or is_transformers_model:
                     loading_kwargs["device_map"] = device_map
-                    loading_kwargs["variant"] = variant
+                    loading_kwargs["variant"] = model_variants.pop(name, None)
                     if from_flax:
                         loading_kwargs["from_flax"] = True
 

From cbe2066defb7c18bca287b3e26c619f8cdcd4396 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Fri, 10 Feb 2023 12:56:48 +0200
Subject: [PATCH 04/27] up

---
 src/diffusers/pipelines/pipeline_utils.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py
index 6f6b392bb239..2a451e14f5be 100644
--- a/src/diffusers/pipelines/pipeline_utils.py
+++ b/src/diffusers/pipelines/pipeline_utils.py
@@ -531,7 +531,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                 ]
 
                 if from_flax:
-                    ignore_patterns = ["*.bin", "*.safetensors"]
+                    ignore_patterns = ["*.bin", "*.safetensors", ".onnx"]
                 elif is_safetensors_available() and is_safetensors_compatible(model_filenames):
                     ignore_patterns = ["*.bin", "*.msgpack"]
                 else:
@@ -586,7 +586,8 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
         if variant is not None:
             for folder in os.listdir(cached_folder):
                 folder_path = os.path.join(cached_folder, folder)
-                variant_exists = os.path.isdir(folder_path) and any(path.split(".")[1] == variant for path in os.listdir(folder_path))
+                is_folder = os.path.isdir(folder_path) and folder in config_dict
+                variant_exists = is_folder and any(path.split(".")[1] == variant for path in os.listdir(folder_path))
                 if variant_exists:
                     model_variants[folder] = variant
 

From c760708cc8a1c1361409ce582b57b8f5cd602661 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Fri, 10 Feb 2023 13:07:14 +0200
Subject: [PATCH 05/27] add tests

---
 tests/test_modeling_common.py | 37 +++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py
index db006790a282..860997672913 100644
--- a/tests/test_modeling_common.py
+++ b/tests/test_modeling_common.py
@@ -66,6 +66,43 @@ def test_from_save_pretrained(self):
         max_diff = (image - new_image).abs().sum().item()
         self.assertLessEqual(max_diff, 5e-5, "Models give different forward passes")
 
+    def test_from_save_pretrained_variant(self):
+        init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
+
+        model = self.model_class(**init_dict)
+        model.to(torch_device)
+        model.eval()
+
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            model.save_pretrained(tmpdirname, variant="fp16")
+            new_model = self.model_class.from_pretrained(tmpdirname, variant="fp16")
+
+            with self.assertRaises(OSError) as error_context:
+                self.model_class.from_pretrained(tmpdirname)
+
+            # make sure that error message states what keys are missing
+            assert "Error no file named diffusion_pytorch_model.bin found in directory" in str(error_context.exception)
+
+            new_model.to(torch_device)
+
+        with torch.no_grad():
+            # Warmup pass when using mps (see #372)
+            if torch_device == "mps" and isinstance(model, ModelMixin):
+                _ = model(**self.dummy_input)
+                _ = new_model(**self.dummy_input)
+
+            image = model(**inputs_dict)
+            if isinstance(image, dict):
+                image = image.sample
+
+            new_image = new_model(**inputs_dict)
+
+            if isinstance(new_image, dict):
+                new_image = new_image.sample
+
+        max_diff = (image - new_image).abs().sum().item()
+        self.assertLessEqual(max_diff, 5e-5, "Models give different forward passes")
+
     def test_from_save_pretrained_dtype(self):
         init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
 

From 8d77537254f9c975c9daeb0cf7531308608c9b83 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Fri, 10 Feb 2023 15:41:07 +0200
Subject: [PATCH 06/27] add some first tests

---
 .../text_to_image/train_text_to_image_lora.py |  4 +--
 src/diffusers/pipelines/pipeline_utils.py     | 29 +++++--------------
 tests/test_modeling_common.py                 | 27 +++++++++++++++++
 tests/test_pipelines.py                       | 29 +++++++++++++++++++
 4 files changed, 66 insertions(+), 23 deletions(-)

diff --git a/examples/text_to_image/train_text_to_image_lora.py b/examples/text_to_image/train_text_to_image_lora.py
index a3c5bef73a95..abc535594d8c 100644
--- a/examples/text_to_image/train_text_to_image_lora.py
+++ b/examples/text_to_image/train_text_to_image_lora.py
@@ -418,9 +418,9 @@ def main():
     # freeze parameters of models to save more memory
     unet.requires_grad_(False)
     vae.requires_grad_(False)
-    
+
     text_encoder.requires_grad_(False)
-    
+
     # For mixed precision training we cast the text_encoder and vae weights to half-precision
     # as these models are only used for inference, keeping weights in full precision is not required.
     weight_dtype = torch.float32
diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py
index 2a451e14f5be..c469b926d683 100644
--- a/src/diffusers/pipelines/pipeline_utils.py
+++ b/src/diffusers/pipelines/pipeline_utils.py
@@ -32,16 +32,12 @@
 import diffusers
 
 from ..configuration_utils import ConfigMixin
-from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, _add_variant
+from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT
 from ..schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME
 from ..utils import (
     CONFIG_NAME,
     DIFFUSERS_CACHE,
-    FLAX_WEIGHTS_NAME,
     HF_HUB_OFFLINE,
-    ONNX_WEIGHTS_NAME,
-    WEIGHTS_NAME,
-    SAFETENSORS_WEIGHTS_NAME,
     BaseOutput,
     deprecate,
     get_class_from_dynamic_module,
@@ -146,7 +142,9 @@ def variant_compatible_siblings(info, variant=None) -> Union[List[os.PathLike],
     save_formats = ["bin", "safetensors", "msgpack", "onnx"]
 
     variant_filenames = set(f for f in filenames if any(f.endswith(f"{variant}.{s}") for s in save_formats))
-    non_variant_filenames = set(f for f in filenames if (len(f.split(".")) == 2 and any(f.endswith(f".{s}") for s in save_formats)))
+    non_variant_filenames = set(
+        f for f in filenames if (len(f.split(".")) == 2 and any(f.endswith(f".{s}") for s in save_formats))
+    )
 
     usable_filenames = set(variant_filenames)
     for f in non_variant_filenames:
@@ -155,7 +153,9 @@ def variant_compatible_siblings(info, variant=None) -> Union[List[os.PathLike],
             usable_filenames.add(f)
 
     if len(variant_filenames) > 0 and usable_filenames != variant_filenames:
-        logger.warn(f"\nA mixture of {variant} and non-{variant} filenames will be loaded.\nLoaded {variant} filenames:\n[{', '.join(variant_filenames)}]\nLoaded non-{variant} filenames:\n[{', '.join(usable_filenames - variant_filenames)} from repository files: {', '.join(filenames)}]\nIf this behavior is not expected, please check your folder structure.")
+        logger.warn(
+            f"\nA mixture of {variant} and non-{variant} filenames will be loaded.\nLoaded {variant} filenames:\n[{', '.join(variant_filenames)}]\nLoaded non-{variant} filenames:\n[{', '.join(usable_filenames - variant_filenames)} from repository files: {', '.join(filenames)}]\nIf this behavior is not expected, please check your folder structure."
+        )
 
     return usable_filenames
 
@@ -539,20 +539,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
 
             else:
                 # allow everything since it has to be downloaded anyways
-                allow_patterns = [os.path.join(k, "*") for k in folder_names]
-
-                allow_patterns += [
-                    WEIGHTS_NAME,
-                    _add_variant(WEIGHTS_NAME),
-                    SAFETENSORS_WEIGHTS_NAME,
-                    _add_variant(SAFETENSORS_WEIGHTS_NAME),
-                    SCHEDULER_CONFIG_NAME,
-                    CONFIG_NAME,
-                    ONNX_WEIGHTS_NAME,
-                    FLAX_WEIGHTS_NAME,
-                    cls.config_name,
-                    CUSTOM_PIPELINE_FILE_NAME,
-                ]
+                ignore_patterns = allow_patterns = None
 
             if cls != DiffusionPipeline:
                 requested_pipeline_class = cls.__name__
diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py
index 860997672913..b6bc522cae6f 100644
--- a/tests/test_modeling_common.py
+++ b/tests/test_modeling_common.py
@@ -16,10 +16,12 @@
 import inspect
 import tempfile
 import unittest
+import unittest.mock as mock
 from typing import Dict, List, Tuple
 
 import numpy as np
 import torch
+from requests.exceptions import HTTPError
 
 from diffusers.models import ModelMixin, UNet2DConditionModel
 from diffusers.training_utils import EMAModel
@@ -34,6 +36,30 @@ def test_accelerate_loading_error_message(self):
         # make sure that error message states what keys are missing
         assert "conv_out.bias" in str(error_context.exception)
 
+    def test_cached_files_are_used_when_no_internet(self):
+        # A mock response for an HTTP head request to emulate server down
+        response_mock = mock.Mock()
+        response_mock.status_code = 500
+        response_mock.headers = {}
+        response_mock.raise_for_status.side_effect = HTTPError
+        response_mock.json.return_value = {}
+
+        # Download this model to make sure it's in the cache.
+        orig_model = UNet2DConditionModel.from_pretrained(
+            "hf-internal-testing/tiny-stable-diffusion-torch", subfolder="unet"
+        )
+
+        # Under the mock environment we get a 500 error when trying to reach the model.
+        with mock.patch("requests.request", return_value=response_mock):
+            # Download this model to make sure it's in the cache.
+            model = UNet2DConditionModel.from_pretrained(
+                "hf-internal-testing/tiny-stable-diffusion-torch", subfolder="unet", local_files_only=True
+            )
+
+        for p1, p2 in zip(orig_model.parameters(), model.parameters()):
+            if p1.data.ne(p2.data).sum() > 0:
+                assert False, "Parameters not the same!"
+
 
 class ModelTesterMixin:
     def test_from_save_pretrained(self):
@@ -77,6 +103,7 @@ def test_from_save_pretrained_variant(self):
             model.save_pretrained(tmpdirname, variant="fp16")
             new_model = self.model_class.from_pretrained(tmpdirname, variant="fp16")
 
+            # non-variant cannot be loaded
             with self.assertRaises(OSError) as error_context:
                 self.model_class.from_pretrained(tmpdirname)
 
diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py
index 71340d43b0a9..65f79dde26a7 100644
--- a/tests/test_pipelines.py
+++ b/tests/test_pipelines.py
@@ -21,6 +21,7 @@
 import sys
 import tempfile
 import unittest
+import unittest.mock as mock
 
 import numpy as np
 import PIL
@@ -28,6 +29,7 @@
 import torch
 from parameterized import parameterized
 from PIL import Image
+from requests.exceptions import HTTPError
 from transformers import CLIPFeatureExtractor, CLIPModel, CLIPTextConfig, CLIPTextModel, CLIPTokenizer
 
 from diffusers import (
@@ -166,6 +168,33 @@ def test_load_no_safety_checker_default_locally(self):
 
         assert np.max(np.abs(out - out_2)) < 1e-3
 
+    def test_cached_files_are_used_when_no_internet(self):
+        # A mock response for an HTTP head request to emulate server down
+        response_mock = mock.Mock()
+        response_mock.status_code = 500
+        response_mock.headers = {}
+        response_mock.raise_for_status.side_effect = HTTPError
+        response_mock.json.return_value = {}
+
+        # Download this model to make sure it's in the cache.
+        orig_pipe = StableDiffusionPipeline.from_pretrained(
+            "hf-internal-testing/tiny-stable-diffusion-torch", safety_checker=None
+        )
+        orig_comps = {k: v for k, v in orig_pipe.components.items() if hasattr(v, "parameters")}
+
+        # Under the mock environment we get a 500 error when trying to reach the model.
+        with mock.patch("requests.request", return_value=response_mock):
+            # Download this model to make sure it's in the cache.
+            pipe = StableDiffusionPipeline.from_pretrained(
+                "hf-internal-testing/tiny-stable-diffusion-torch", safety_checker=None, local_files_only=True
+            )
+            comps = {k: v for k, v in pipe.components.items() if hasattr(v, "parameters")}
+
+        for m1, m2 in zip(orig_comps.values(), comps.values()):
+            for p1, p2 in zip(m1.parameters(), m2.parameters()):
+                if p1.data.ne(p2.data).sum() > 0:
+                    assert False, "Parameters not the same!"
+
 
 class CustomPipelineTests(unittest.TestCase):
     def test_load_custom_pipeline(self):

From 4f6d13c541459aa3665d1d3260a7e3c1775bee84 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Fri, 10 Feb 2023 17:12:00 +0200
Subject: [PATCH 07/27] up

---
 @                                         | 1017 +++++++++++++++++++++
 src/diffusers/pipelines/pipeline_utils.py |   16 +-
 tests/test_pipelines.py                   |  122 +++
 3 files changed, 1148 insertions(+), 7 deletions(-)
 create mode 100644 @

diff --git a/@ b/@
new file mode 100644
index 000000000000..5c561ede3d02
--- /dev/null
+++ b/@
@@ -0,0 +1,1017 @@
+# coding=utf-8
+# Copyright 2022 HuggingFace Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import gc
+import json
+import os
+import random
+import shutil
+import sys
+import tempfile
+import unittest
+import unittest.mock as mock
+
+import numpy as np
+import PIL
+import safetensors.torch
+import torch
+from parameterized import parameterized
+from PIL import Image
+from requests.exceptions import HTTPError
+from transformers import CLIPFeatureExtractor, CLIPModel, CLIPTextConfig, CLIPTextModel, CLIPTokenizer
+
+from diffusers import (
+    AutoencoderKL,
+    DDIMPipeline,
+    DDIMScheduler,
+    DDPMPipeline,
+    DDPMScheduler,
+    DiffusionPipeline,
+    DPMSolverMultistepScheduler,
+    EulerAncestralDiscreteScheduler,
+    EulerDiscreteScheduler,
+    LMSDiscreteScheduler,
+    PNDMScheduler,
+    StableDiffusionImg2ImgPipeline,
+    StableDiffusionInpaintPipelineLegacy,
+    StableDiffusionPipeline,
+    UNet2DConditionModel,
+    UNet2DModel,
+    logging,
+)
+from diffusers.schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME
+from diffusers.utils import CONFIG_NAME, WEIGHTS_NAME, floats_tensor, is_flax_available, nightly, slow, torch_device
+from diffusers.utils.testing_utils import CaptureLogger, get_tests_dir, require_torch_gpu
+
+
+torch.backends.cuda.matmul.allow_tf32 = False
+
+
+class DownloadTests(unittest.TestCase):
+    def test_download_only_pytorch(self):
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            # pipeline has Flax weights
+            _ = DiffusionPipeline.from_pretrained(
+                "hf-internal-testing/tiny-stable-diffusion-pipe", safety_checker=None, cache_dir=tmpdirname
+            )
+
+            all_root_files = [t[-1] for t in os.walk(os.path.join(tmpdirname, os.listdir(tmpdirname)[0], "snapshots"))]
+            files = [item for sublist in all_root_files for item in sublist]
+
+            # None of the downloaded files should be a flax file even if we have some here:
+            # https://huggingface.co/hf-internal-testing/tiny-stable-diffusion-pipe/blob/main/unet/diffusion_flax_model.msgpack
+            assert not any(f.endswith(".msgpack") for f in files)
+            # We need to never convert this tiny model to safetensors for this test to pass
+            assert not any(f.endswith(".safetensors") for f in files)
+
+    def test_returned_cached_folder(self):
+        prompt = "hello"
+        pipe = StableDiffusionPipeline.from_pretrained(
+            "hf-internal-testing/tiny-stable-diffusion-torch", safety_checker=None
+        )
+        _, local_path = StableDiffusionPipeline.from_pretrained(
+            "hf-internal-testing/tiny-stable-diffusion-torch", safety_checker=None, return_cached_folder=True
+        )
+        pipe_2 = StableDiffusionPipeline.from_pretrained(local_path)
+
+        pipe = pipe.to(torch_device)
+        pipe_2 = pipe_2.to(torch_device)
+
+        generator = torch.manual_seed(0)
+        out = pipe(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images
+
+        generator = torch.manual_seed(0)
+        out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images
+
+        assert np.max(np.abs(out - out_2)) < 1e-3
+
+    def test_download_safetensors(self):
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            # pipeline has Flax weights
+            _ = DiffusionPipeline.from_pretrained(
+                "hf-internal-testing/tiny-stable-diffusion-pipe-safetensors",
+                safety_checker=None,
+                cache_dir=tmpdirname,
+            )
+
+            all_root_files = [t[-1] for t in os.walk(os.path.join(tmpdirname, os.listdir(tmpdirname)[0], "snapshots"))]
+            files = [item for sublist in all_root_files for item in sublist]
+
+            # None of the downloaded files should be a pytorch file even if we have some here:
+            # https://huggingface.co/hf-internal-testing/tiny-stable-diffusion-pipe/blob/main/unet/diffusion_flax_model.msgpack
+            assert not any(f.endswith(".bin") for f in files)
+
+    def test_download_no_safety_checker(self):
+        prompt = "hello"
+        pipe = StableDiffusionPipeline.from_pretrained(
+            "hf-internal-testing/tiny-stable-diffusion-torch", safety_checker=None
+        )
+        pipe = pipe.to(torch_device)
+        generator = torch.manual_seed(0)
+        out = pipe(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images
+
+        pipe_2 = StableDiffusionPipeline.from_pretrained("hf-internal-testing/tiny-stable-diffusion-torch")
+        pipe_2 = pipe_2.to(torch_device)
+        generator = torch.manual_seed(0)
+        out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images
+
+        assert np.max(np.abs(out - out_2)) < 1e-3
+
+    def test_load_no_safety_checker_explicit_locally(self):
+        prompt = "hello"
+        pipe = StableDiffusionPipeline.from_pretrained(
+            "hf-internal-testing/tiny-stable-diffusion-torch", safety_checker=None
+        )
+        pipe = pipe.to(torch_device)
+        generator = torch.manual_seed(0)
+        out = pipe(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images
+
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            pipe.save_pretrained(tmpdirname)
+            pipe_2 = StableDiffusionPipeline.from_pretrained(tmpdirname, safety_checker=None)
+            pipe_2 = pipe_2.to(torch_device)
+
+            generator = torch.manual_seed(0)
+
+            out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images
+
+        assert np.max(np.abs(out - out_2)) < 1e-3
+
+    def test_load_no_safety_checker_default_locally(self):
+        prompt = "hello"
+        pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/tiny-stable-diffusion-torch")
+        pipe = pipe.to(torch_device)
+
+        generator = torch.manual_seed(0)
+        out = pipe(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images
+
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            pipe.save_pretrained(tmpdirname)
+            pipe_2 = StableDiffusionPipeline.from_pretrained(tmpdirname)
+            pipe_2 = pipe_2.to(torch_device)
+
+            generator = torch.manual_seed(0)
+
+            out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images
+
+        assert np.max(np.abs(out - out_2)) < 1e-3
+
+    def test_cached_files_are_used_when_no_internet(self):
+        # A mock response for an HTTP head request to emulate server down
+        response_mock = mock.Mock()
+        response_mock.status_code = 500
+        response_mock.headers = {}
+        response_mock.raise_for_status.side_effect = HTTPError
+        response_mock.json.return_value = {}
+
+        # Download this model to make sure it's in the cache.
+        orig_pipe = StableDiffusionPipeline.from_pretrained(
+            "hf-internal-testing/tiny-stable-diffusion-torch", safety_checker=None
+        )
+        orig_comps = {k: v for k, v in orig_pipe.components.items() if hasattr(v, "parameters")}
+
+        # Under the mock environment we get a 500 error when trying to reach the model.
+        with mock.patch("requests.request", return_value=response_mock):
+            # Download this model to make sure it's in the cache.
+            pipe = StableDiffusionPipeline.from_pretrained(
+                "hf-internal-testing/tiny-stable-diffusion-torch", safety_checker=None, local_files_only=True
+            )
+            comps = {k: v for k, v in pipe.components.items() if hasattr(v, "parameters")}
+
+        for m1, m2 in zip(orig_comps.values(), comps.values()):
+            for p1, p2 in zip(m1.parameters(), m2.parameters()):
+                if p1.data.ne(p2.data).sum() > 0:
+                    assert False, "Parameters not the same!"
+
+    def test_download_from_variant_folder(self):
+        for safe_avail in [False, True]:
+            import diffusers
+            diffusers.utils.import_utils._safetensors_available = safe_avail
+
+            other_format = ".bin" if safe_avail else ".safetensors"
+            with tempfile.TemporaryDirectory() as tmpdirname:
+                StableDiffusionPipeline.from_pretrained("hf-internal-testing/stable-diffusion-all-variants", cache_dir=tmpdirname)
+                all_root_files = [t[-1] for t in os.walk(os.path.join(tmpdirname, os.listdir(tmpdirname)[0], "snapshots"))]
+                files = [item for sublist in all_root_files for item in sublist]
+
+                # None of the downloaded files should be a variant file even if we have some here:
+                # https://huggingface.co/hf-internal-testing/stable-diffusion-all-variants/tree/main/unet
+                assert len(files) == 15, f"We should only download 15 files, not {len(files)}"
+                assert not any(f.endswith(other_format) for f in files)
+                # no variants
+                assert not any(len(f.split(".")) == 3 for f in files)
+
+        diffusers.utils.import_utils._safetensors_available = True
+
+    def test_download_variant_all(self):
+        for safe_avail in [False, True]:
+            import diffusers
+            diffusers.utils.import_utils._safetensors_available = safe_avail
+
+            other_format = ".bin" if safe_avail else ".safetensors"
+            this_format = ".safetensors" if safe_avail else ".bin"
+            variant = "fp16"
+
+            with tempfile.TemporaryDirectory() as tmpdirname:
+                StableDiffusionPipeline.from_pretrained("hf-internal-testing/stable-diffusion-all-variants", cache_dir=tmpdirname, variant=variant)
+                all_root_files = [t[-1] for t in os.walk(os.path.join(tmpdirname, os.listdir(tmpdirname)[0], "snapshots"))]
+                files = [item for sublist in all_root_files for item in sublist]
+
+                # None of the downloaded files should be a non-variant file even if we have some here:
+                # https://huggingface.co/hf-internal-testing/stable-diffusion-all-variants/tree/main/unet
+                import ipdb; ipdb.set_trace()
+                assert len(files) == 15, f"We should only download 15 files, not {len(files)}"
+                # unet, vae, text_encoder, safety_checker
+                assert len([f for f in files if f.endswith(f"{variant}{this_format}")]) == 4
+                # all checkpoints should have variant ending
+                assert not any(f.endswith(this_format) and not f.endswith(f"{variant}{this_format}") for f in files)
+                assert not any(f.endswith(other_format) for f in files)
+
+        diffusers.utils.import_utils._safetensors_available = True
+
+    def test_download_variant_partly(self):
+        for safe_avail in [False, True]:
+            import diffusers
+            diffusers.utils.import_utils._safetensors_available = safe_avail
+
+            other_format = ".bin" if safe_avail else ".safetensors"
+            this_format = ".safetensors" if safe_avail else ".bin"
+            variant = "no_ema"
+
+            with tempfile.TemporaryDirectory() as tmpdirname:
+                StableDiffusionPipeline.from_pretrained("hf-internal-testing/stable-diffusion-all-variants", cache_dir=tmpdirname, variant=variant)
+                snapshots = os.path.join(tmpdirname, os.listdir(tmpdirname)[0], "snapshots")
+                all_root_files = [t[-1] for t in os.walk(snapshots)]
+                files = [item for sublist in all_root_files for item in sublist]
+
+
+                # None of the downloaded files should be a non-variant file even if we have some here:
+                # https://huggingface.co/hf-internal-testing/stable-diffusion-all-variants/tree/main/unet
+                import ipdb; ipdb.set_trace()
+                assert len(files) == 15, f"We should only download 15 files, not {len(files)}"
+                # only unet has "no_ema" variant
+                assert len([f for f in files if f.endswith(f"{variant}{this_format}")]) == 1
+                # vae, safety_checker and text_encoder should have no variant
+                assert sum(f.endswith(this_format) and not f.endswith(f"{variant}{this_format}") for f in files) == 3
+                assert not any(f.endswith(other_format) for f in files)
+
+        diffusers.utils.import_utils._safetensors_available = True
+
+
+class CustomPipelineTests(unittest.TestCase):
+    def test_load_custom_pipeline(self):
+        pipeline = DiffusionPipeline.from_pretrained(
+            "google/ddpm-cifar10-32", custom_pipeline="hf-internal-testing/diffusers-dummy-pipeline"
+        )
+        pipeline = pipeline.to(torch_device)
+        # NOTE that `"CustomPipeline"` is not a class that is defined in this library, but solely on the Hub
+        # under https://huggingface.co/hf-internal-testing/diffusers-dummy-pipeline/blob/main/pipeline.py#L24
+        assert pipeline.__class__.__name__ == "CustomPipeline"
+
+    def test_load_custom_github(self):
+        pipeline = DiffusionPipeline.from_pretrained(
+            "google/ddpm-cifar10-32", custom_pipeline="one_step_unet", custom_revision="main"
+        )
+
+        # make sure that on "main" pipeline gives only ones because of: https://github.com/huggingface/diffusers/pull/1690
+        with torch.no_grad():
+            output = pipeline()
+
+        assert output.numel() == output.sum()
+
+        # hack since Python doesn't like overwriting modules: https://stackoverflow.com/questions/3105801/unload-a-module-in-python
+        # Could in the future work with hashes instead.
+        del sys.modules["diffusers_modules.git.one_step_unet"]
+
+        pipeline = DiffusionPipeline.from_pretrained(
+            "google/ddpm-cifar10-32", custom_pipeline="one_step_unet", custom_revision="0.10.2"
+        )
+        with torch.no_grad():
+            output = pipeline()
+
+        assert output.numel() != output.sum()
+
+        assert pipeline.__class__.__name__ == "UnetSchedulerOneForwardPipeline"
+
+    def test_run_custom_pipeline(self):
+        pipeline = DiffusionPipeline.from_pretrained(
+            "google/ddpm-cifar10-32", custom_pipeline="hf-internal-testing/diffusers-dummy-pipeline"
+        )
+        pipeline = pipeline.to(torch_device)
+        images, output_str = pipeline(num_inference_steps=2, output_type="np")
+
+        assert images[0].shape == (1, 32, 32, 3)
+
+        # compare output to https://huggingface.co/hf-internal-testing/diffusers-dummy-pipeline/blob/main/pipeline.py#L102
+        assert output_str == "This is a test"
+
+    def test_local_custom_pipeline_repo(self):
+        local_custom_pipeline_path = get_tests_dir("fixtures/custom_pipeline")
+        pipeline = DiffusionPipeline.from_pretrained(
+            "google/ddpm-cifar10-32", custom_pipeline=local_custom_pipeline_path
+        )
+        pipeline = pipeline.to(torch_device)
+        images, output_str = pipeline(num_inference_steps=2, output_type="np")
+
+        assert pipeline.__class__.__name__ == "CustomLocalPipeline"
+        assert images[0].shape == (1, 32, 32, 3)
+        # compare to https://github.com/huggingface/diffusers/blob/main/tests/fixtures/custom_pipeline/pipeline.py#L102
+        assert output_str == "This is a local test"
+
+    def test_local_custom_pipeline_file(self):
+        local_custom_pipeline_path = get_tests_dir("fixtures/custom_pipeline")
+        local_custom_pipeline_path = os.path.join(local_custom_pipeline_path, "what_ever.py")
+        pipeline = DiffusionPipeline.from_pretrained(
+            "google/ddpm-cifar10-32", custom_pipeline=local_custom_pipeline_path
+        )
+        pipeline = pipeline.to(torch_device)
+        images, output_str = pipeline(num_inference_steps=2, output_type="np")
+
+        assert pipeline.__class__.__name__ == "CustomLocalPipeline"
+        assert images[0].shape == (1, 32, 32, 3)
+        # compare to https://github.com/huggingface/diffusers/blob/main/tests/fixtures/custom_pipeline/pipeline.py#L102
+        assert output_str == "This is a local test"
+
+    @slow
+    @require_torch_gpu
+    def test_load_pipeline_from_git(self):
+        clip_model_id = "laion/CLIP-ViT-B-32-laion2B-s34B-b79K"
+
+        feature_extractor = CLIPFeatureExtractor.from_pretrained(clip_model_id)
+        clip_model = CLIPModel.from_pretrained(clip_model_id, torch_dtype=torch.float16)
+
+        pipeline = DiffusionPipeline.from_pretrained(
+            "CompVis/stable-diffusion-v1-4",
+            custom_pipeline="clip_guided_stable_diffusion",
+            clip_model=clip_model,
+            feature_extractor=feature_extractor,
+            torch_dtype=torch.float16,
+        )
+        pipeline.enable_attention_slicing()
+        pipeline = pipeline.to(torch_device)
+
+        # NOTE that `"CLIPGuidedStableDiffusion"` is not a class that is defined in the pypi package of th e library, but solely on the community examples folder of GitHub under:
+        # https://github.com/huggingface/diffusers/blob/main/examples/community/clip_guided_stable_diffusion.py
+        assert pipeline.__class__.__name__ == "CLIPGuidedStableDiffusion"
+
+        image = pipeline("a prompt", num_inference_steps=2, output_type="np").images[0]
+        assert image.shape == (512, 512, 3)
+
+
+class PipelineFastTests(unittest.TestCase):
+    def tearDown(self):
+        # clean up the VRAM after each test
+        super().tearDown()
+        gc.collect()
+        torch.cuda.empty_cache()
+
+        import diffusers
+
+        diffusers.utils.import_utils._safetensors_available = True
+
+    def dummy_image(self):
+        batch_size = 1
+        num_channels = 3
+        sizes = (32, 32)
+
+        image = floats_tensor((batch_size, num_channels) + sizes, rng=random.Random(0)).to(torch_device)
+        return image
+
+    def dummy_uncond_unet(self, sample_size=32):
+        torch.manual_seed(0)
+        model = UNet2DModel(
+            block_out_channels=(32, 64),
+            layers_per_block=2,
+            sample_size=sample_size,
+            in_channels=3,
+            out_channels=3,
+            down_block_types=("DownBlock2D", "AttnDownBlock2D"),
+            up_block_types=("AttnUpBlock2D", "UpBlock2D"),
+        )
+        return model
+
+    def dummy_cond_unet(self, sample_size=32):
+        torch.manual_seed(0)
+        model = UNet2DConditionModel(
+            block_out_channels=(32, 64),
+            layers_per_block=2,
+            sample_size=sample_size,
+            in_channels=4,
+            out_channels=4,
+            down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
+            up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
+            cross_attention_dim=32,
+        )
+        return model
+
+    @property
+    def dummy_vae(self):
+        torch.manual_seed(0)
+        model = AutoencoderKL(
+            block_out_channels=[32, 64],
+            in_channels=3,
+            out_channels=3,
+            down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"],
+            up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"],
+            latent_channels=4,
+        )
+        return model
+
+    @property
+    def dummy_text_encoder(self):
+        torch.manual_seed(0)
+        config = CLIPTextConfig(
+            bos_token_id=0,
+            eos_token_id=2,
+            hidden_size=32,
+            intermediate_size=37,
+            layer_norm_eps=1e-05,
+            num_attention_heads=4,
+            num_hidden_layers=5,
+            pad_token_id=1,
+            vocab_size=1000,
+        )
+        return CLIPTextModel(config)
+
+    @property
+    def dummy_extractor(self):
+        def extract(*args, **kwargs):
+            class Out:
+                def __init__(self):
+                    self.pixel_values = torch.ones([0])
+
+                def to(self, device):
+                    self.pixel_values.to(device)
+                    return self
+
+            return Out()
+
+        return extract
+
+    @parameterized.expand(
+        [
+            [DDIMScheduler, DDIMPipeline, 32],
+            [DDPMScheduler, DDPMPipeline, 32],
+            [DDIMScheduler, DDIMPipeline, (32, 64)],
+            [DDPMScheduler, DDPMPipeline, (64, 32)],
+        ]
+    )
+    def test_uncond_unet_components(self, scheduler_fn=DDPMScheduler, pipeline_fn=DDPMPipeline, sample_size=32):
+        unet = self.dummy_uncond_unet(sample_size)
+        scheduler = scheduler_fn()
+        pipeline = pipeline_fn(unet, scheduler).to(torch_device)
+
+        generator = torch.manual_seed(0)
+        out_image = pipeline(
+            generator=generator,
+            num_inference_steps=2,
+            output_type="np",
+        ).images
+        sample_size = (sample_size, sample_size) if isinstance(sample_size, int) else sample_size
+        assert out_image.shape == (1, *sample_size, 3)
+
+    def test_stable_diffusion_components(self):
+        """Test that components property works correctly"""
+        unet = self.dummy_cond_unet()
+        scheduler = PNDMScheduler(skip_prk_steps=True)
+        vae = self.dummy_vae
+        bert = self.dummy_text_encoder
+        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
+
+        image = self.dummy_image().cpu().permute(0, 2, 3, 1)[0]
+        init_image = Image.fromarray(np.uint8(image)).convert("RGB")
+        mask_image = Image.fromarray(np.uint8(image + 4)).convert("RGB").resize((32, 32))
+
+        # make sure here that pndm scheduler skips prk
+        inpaint = StableDiffusionInpaintPipelineLegacy(
+            unet=unet,
+            scheduler=scheduler,
+            vae=vae,
+            text_encoder=bert,
+            tokenizer=tokenizer,
+            safety_checker=None,
+            feature_extractor=self.dummy_extractor,
+        ).to(torch_device)
+        img2img = StableDiffusionImg2ImgPipeline(**inpaint.components).to(torch_device)
+        text2img = StableDiffusionPipeline(**inpaint.components).to(torch_device)
+
+        prompt = "A painting of a squirrel eating a burger"
+
+        generator = torch.manual_seed(0)
+        image_inpaint = inpaint(
+            [prompt],
+            generator=generator,
+            num_inference_steps=2,
+            output_type="np",
+            image=init_image,
+            mask_image=mask_image,
+        ).images
+        image_img2img = img2img(
+            [prompt],
+            generator=generator,
+            num_inference_steps=2,
+            output_type="np",
+            image=init_image,
+        ).images
+        image_text2img = text2img(
+            [prompt],
+            generator=generator,
+            num_inference_steps=2,
+            output_type="np",
+        ).images
+
+        assert image_inpaint.shape == (1, 32, 32, 3)
+        assert image_img2img.shape == (1, 32, 32, 3)
+        assert image_text2img.shape == (1, 64, 64, 3)
+
+    def test_set_scheduler(self):
+        unet = self.dummy_cond_unet()
+        scheduler = PNDMScheduler(skip_prk_steps=True)
+        vae = self.dummy_vae
+        bert = self.dummy_text_encoder
+        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
+
+        sd = StableDiffusionPipeline(
+            unet=unet,
+            scheduler=scheduler,
+            vae=vae,
+            text_encoder=bert,
+            tokenizer=tokenizer,
+            safety_checker=None,
+            feature_extractor=self.dummy_extractor,
+        )
+
+        sd.scheduler = DDIMScheduler.from_config(sd.scheduler.config)
+        assert isinstance(sd.scheduler, DDIMScheduler)
+        sd.scheduler = DDPMScheduler.from_config(sd.scheduler.config)
+        assert isinstance(sd.scheduler, DDPMScheduler)
+        sd.scheduler = PNDMScheduler.from_config(sd.scheduler.config)
+        assert isinstance(sd.scheduler, PNDMScheduler)
+        sd.scheduler = LMSDiscreteScheduler.from_config(sd.scheduler.config)
+        assert isinstance(sd.scheduler, LMSDiscreteScheduler)
+        sd.scheduler = EulerDiscreteScheduler.from_config(sd.scheduler.config)
+        assert isinstance(sd.scheduler, EulerDiscreteScheduler)
+        sd.scheduler = EulerAncestralDiscreteScheduler.from_config(sd.scheduler.config)
+        assert isinstance(sd.scheduler, EulerAncestralDiscreteScheduler)
+        sd.scheduler = DPMSolverMultistepScheduler.from_config(sd.scheduler.config)
+        assert isinstance(sd.scheduler, DPMSolverMultistepScheduler)
+
+    def test_set_scheduler_consistency(self):
+        unet = self.dummy_cond_unet()
+        pndm = PNDMScheduler.from_config("hf-internal-testing/tiny-stable-diffusion-torch", subfolder="scheduler")
+        ddim = DDIMScheduler.from_config("hf-internal-testing/tiny-stable-diffusion-torch", subfolder="scheduler")
+        vae = self.dummy_vae
+        bert = self.dummy_text_encoder
+        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
+
+        sd = StableDiffusionPipeline(
+            unet=unet,
+            scheduler=pndm,
+            vae=vae,
+            text_encoder=bert,
+            tokenizer=tokenizer,
+            safety_checker=None,
+            feature_extractor=self.dummy_extractor,
+        )
+
+        pndm_config = sd.scheduler.config
+        sd.scheduler = DDPMScheduler.from_config(pndm_config)
+        sd.scheduler = PNDMScheduler.from_config(sd.scheduler.config)
+        pndm_config_2 = sd.scheduler.config
+        pndm_config_2 = {k: v for k, v in pndm_config_2.items() if k in pndm_config}
+
+        assert dict(pndm_config) == dict(pndm_config_2)
+
+        sd = StableDiffusionPipeline(
+            unet=unet,
+            scheduler=ddim,
+            vae=vae,
+            text_encoder=bert,
+            tokenizer=tokenizer,
+            safety_checker=None,
+            feature_extractor=self.dummy_extractor,
+        )
+
+        ddim_config = sd.scheduler.config
+        sd.scheduler = LMSDiscreteScheduler.from_config(ddim_config)
+        sd.scheduler = DDIMScheduler.from_config(sd.scheduler.config)
+        ddim_config_2 = sd.scheduler.config
+        ddim_config_2 = {k: v for k, v in ddim_config_2.items() if k in ddim_config}
+
+        assert dict(ddim_config) == dict(ddim_config_2)
+
+    def test_save_safe_serialization(self):
+        pipeline = StableDiffusionPipeline.from_pretrained("hf-internal-testing/tiny-stable-diffusion-torch")
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            pipeline.save_pretrained(tmpdirname, safe_serialization=True)
+
+            # Validate that the VAE safetensor exists and are of the correct format
+            vae_path = os.path.join(tmpdirname, "vae", "diffusion_pytorch_model.safetensors")
+            assert os.path.exists(vae_path), f"Could not find {vae_path}"
+            _ = safetensors.torch.load_file(vae_path)
+
+            # Validate that the UNet safetensor exists and are of the correct format
+            unet_path = os.path.join(tmpdirname, "unet", "diffusion_pytorch_model.safetensors")
+            assert os.path.exists(unet_path), f"Could not find {unet_path}"
+            _ = safetensors.torch.load_file(unet_path)
+
+            # Validate that the text encoder safetensor exists and are of the correct format
+            text_encoder_path = os.path.join(tmpdirname, "text_encoder", "model.safetensors")
+            assert os.path.exists(text_encoder_path), f"Could not find {text_encoder_path}"
+            _ = safetensors.torch.load_file(text_encoder_path)
+
+            pipeline = StableDiffusionPipeline.from_pretrained(tmpdirname)
+            assert pipeline.unet is not None
+            assert pipeline.vae is not None
+            assert pipeline.text_encoder is not None
+            assert pipeline.scheduler is not None
+            assert pipeline.feature_extractor is not None
+
+    def test_no_pytorch_download_when_doing_safetensors(self):
+        # by default we don't download
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            _ = StableDiffusionPipeline.from_pretrained(
+                "hf-internal-testing/diffusers-stable-diffusion-tiny-all", cache_dir=tmpdirname
+            )
+
+            path = os.path.join(
+                tmpdirname,
+                "models--hf-internal-testing--diffusers-stable-diffusion-tiny-all",
+                "snapshots",
+                "07838d72e12f9bcec1375b0482b80c1d399be843",
+                "unet",
+            )
+            # safetensors exists
+            assert os.path.exists(os.path.join(path, "diffusion_pytorch_model.safetensors"))
+            # pytorch does not
+            assert not os.path.exists(os.path.join(path, "diffusion_pytorch_model.bin"))
+
+    def test_no_safetensors_download_when_doing_pytorch(self):
+        # mock diffusers safetensors not available
+        import diffusers
+
+        diffusers.utils.import_utils._safetensors_available = False
+
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            _ = StableDiffusionPipeline.from_pretrained(
+                "hf-internal-testing/diffusers-stable-diffusion-tiny-all", cache_dir=tmpdirname
+            )
+
+            path = os.path.join(
+                tmpdirname,
+                "models--hf-internal-testing--diffusers-stable-diffusion-tiny-all",
+                "snapshots",
+                "07838d72e12f9bcec1375b0482b80c1d399be843",
+                "unet",
+            )
+            # safetensors does not exists
+            assert not os.path.exists(os.path.join(path, "diffusion_pytorch_model.safetensors"))
+            # pytorch does
+            assert os.path.exists(os.path.join(path, "diffusion_pytorch_model.bin"))
+
+        diffusers.utils.import_utils._safetensors_available = True
+
+    def test_optional_components(self):
+        unet = self.dummy_cond_unet()
+        pndm = PNDMScheduler.from_config("hf-internal-testing/tiny-stable-diffusion-torch", subfolder="scheduler")
+        vae = self.dummy_vae
+        bert = self.dummy_text_encoder
+        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
+
+        orig_sd = StableDiffusionPipeline(
+            unet=unet,
+            scheduler=pndm,
+            vae=vae,
+            text_encoder=bert,
+            tokenizer=tokenizer,
+            safety_checker=unet,
+            feature_extractor=self.dummy_extractor,
+        )
+        sd = orig_sd
+
+        assert sd.config.requires_safety_checker is True
+
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            sd.save_pretrained(tmpdirname)
+
+            # Test that passing None works
+            sd = StableDiffusionPipeline.from_pretrained(
+                tmpdirname, feature_extractor=None, safety_checker=None, requires_safety_checker=False
+            )
+
+            assert sd.config.requires_safety_checker is False
+            assert sd.config.safety_checker == (None, None)
+            assert sd.config.feature_extractor == (None, None)
+
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            sd.save_pretrained(tmpdirname)
+
+            # Test that loading previous None works
+            sd = StableDiffusionPipeline.from_pretrained(tmpdirname)
+
+            assert sd.config.requires_safety_checker is False
+            assert sd.config.safety_checker == (None, None)
+            assert sd.config.feature_extractor == (None, None)
+
+            orig_sd.save_pretrained(tmpdirname)
+
+            # Test that loading without any directory works
+            shutil.rmtree(os.path.join(tmpdirname, "safety_checker"))
+            with open(os.path.join(tmpdirname, sd.config_name)) as f:
+                config = json.load(f)
+                config["safety_checker"] = [None, None]
+            with open(os.path.join(tmpdirname, sd.config_name), "w") as f:
+                json.dump(config, f)
+
+            sd = StableDiffusionPipeline.from_pretrained(tmpdirname, requires_safety_checker=False)
+            sd.save_pretrained(tmpdirname)
+            sd = StableDiffusionPipeline.from_pretrained(tmpdirname)
+
+            assert sd.config.requires_safety_checker is False
+            assert sd.config.safety_checker == (None, None)
+            assert sd.config.feature_extractor == (None, None)
+
+            # Test that loading from deleted model index works
+            with open(os.path.join(tmpdirname, sd.config_name)) as f:
+                config = json.load(f)
+                del config["safety_checker"]
+                del config["feature_extractor"]
+            with open(os.path.join(tmpdirname, sd.config_name), "w") as f:
+                json.dump(config, f)
+
+            sd = StableDiffusionPipeline.from_pretrained(tmpdirname)
+
+            assert sd.config.requires_safety_checker is False
+            assert sd.config.safety_checker == (None, None)
+            assert sd.config.feature_extractor == (None, None)
+
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            sd.save_pretrained(tmpdirname)
+
+            # Test that partially loading works
+            sd = StableDiffusionPipeline.from_pretrained(tmpdirname, feature_extractor=self.dummy_extractor)
+
+            assert sd.config.requires_safety_checker is False
+            assert sd.config.safety_checker == (None, None)
+            assert sd.config.feature_extractor != (None, None)
+
+            # Test that partially loading works
+            sd = StableDiffusionPipeline.from_pretrained(
+                tmpdirname,
+                feature_extractor=self.dummy_extractor,
+                safety_checker=unet,
+                requires_safety_checker=[True, True],
+            )
+
+            assert sd.config.requires_safety_checker == [True, True]
+            assert sd.config.safety_checker != (None, None)
+            assert sd.config.feature_extractor != (None, None)
+
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            sd.save_pretrained(tmpdirname)
+            sd = StableDiffusionPipeline.from_pretrained(tmpdirname, feature_extractor=self.dummy_extractor)
+
+            assert sd.config.requires_safety_checker == [True, True]
+            assert sd.config.safety_checker != (None, None)
+            assert sd.config.feature_extractor != (None, None)
+
+
+@slow
+@require_torch_gpu
+class PipelineSlowTests(unittest.TestCase):
+    def tearDown(self):
+        # clean up the VRAM after each test
+        super().tearDown()
+        gc.collect()
+        torch.cuda.empty_cache()
+
+    def test_smart_download(self):
+        model_id = "hf-internal-testing/unet-pipeline-dummy"
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            _ = DiffusionPipeline.from_pretrained(model_id, cache_dir=tmpdirname, force_download=True)
+            local_repo_name = "--".join(["models"] + model_id.split("/"))
+            snapshot_dir = os.path.join(tmpdirname, local_repo_name, "snapshots")
+            snapshot_dir = os.path.join(snapshot_dir, os.listdir(snapshot_dir)[0])
+
+            # inspect all downloaded files to make sure that everything is included
+            assert os.path.isfile(os.path.join(snapshot_dir, DiffusionPipeline.config_name))
+            assert os.path.isfile(os.path.join(snapshot_dir, CONFIG_NAME))
+            assert os.path.isfile(os.path.join(snapshot_dir, SCHEDULER_CONFIG_NAME))
+            assert os.path.isfile(os.path.join(snapshot_dir, WEIGHTS_NAME))
+            assert os.path.isfile(os.path.join(snapshot_dir, "scheduler", SCHEDULER_CONFIG_NAME))
+            assert os.path.isfile(os.path.join(snapshot_dir, "unet", WEIGHTS_NAME))
+            assert os.path.isfile(os.path.join(snapshot_dir, "unet", WEIGHTS_NAME))
+            # let's make sure the super large numpy file:
+            # https://huggingface.co/hf-internal-testing/unet-pipeline-dummy/blob/main/big_array.npy
+            # is not downloaded, but all the expected ones
+            assert not os.path.isfile(os.path.join(snapshot_dir, "big_array.npy"))
+
+    def test_warning_unused_kwargs(self):
+        model_id = "hf-internal-testing/unet-pipeline-dummy"
+        logger = logging.get_logger("diffusers.pipelines")
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            with CaptureLogger(logger) as cap_logger:
+                DiffusionPipeline.from_pretrained(
+                    model_id,
+                    not_used=True,
+                    cache_dir=tmpdirname,
+                    force_download=True,
+                )
+
+        assert (
+            cap_logger.out
+            == "Keyword arguments {'not_used': True} are not expected by DDPMPipeline and will be ignored.\n"
+        )
+
+    def test_from_save_pretrained(self):
+        # 1. Load models
+        model = UNet2DModel(
+            block_out_channels=(32, 64),
+            layers_per_block=2,
+            sample_size=32,
+            in_channels=3,
+            out_channels=3,
+            down_block_types=("DownBlock2D", "AttnDownBlock2D"),
+            up_block_types=("AttnUpBlock2D", "UpBlock2D"),
+        )
+        schedular = DDPMScheduler(num_train_timesteps=10)
+
+        ddpm = DDPMPipeline(model, schedular)
+        ddpm.to(torch_device)
+        ddpm.set_progress_bar_config(disable=None)
+
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            ddpm.save_pretrained(tmpdirname)
+            new_ddpm = DDPMPipeline.from_pretrained(tmpdirname)
+            new_ddpm.to(torch_device)
+
+        generator = torch.Generator(device=torch_device).manual_seed(0)
+        image = ddpm(generator=generator, num_inference_steps=5, output_type="numpy").images
+
+        generator = torch.Generator(device=torch_device).manual_seed(0)
+        new_image = new_ddpm(generator=generator, num_inference_steps=5, output_type="numpy").images
+
+        assert np.abs(image - new_image).sum() < 1e-5, "Models don't give the same forward pass"
+
+    def test_from_pretrained_hub(self):
+        model_path = "google/ddpm-cifar10-32"
+
+        scheduler = DDPMScheduler(num_train_timesteps=10)
+
+        ddpm = DDPMPipeline.from_pretrained(model_path, scheduler=scheduler)
+        ddpm = ddpm.to(torch_device)
+        ddpm.set_progress_bar_config(disable=None)
+
+        ddpm_from_hub = DiffusionPipeline.from_pretrained(model_path, scheduler=scheduler)
+        ddpm_from_hub = ddpm_from_hub.to(torch_device)
+        ddpm_from_hub.set_progress_bar_config(disable=None)
+
+        generator = torch.Generator(device=torch_device).manual_seed(0)
+        image = ddpm(generator=generator, num_inference_steps=5, output_type="numpy").images
+
+        generator = torch.Generator(device=torch_device).manual_seed(0)
+        new_image = ddpm_from_hub(generator=generator, num_inference_steps=5, output_type="numpy").images
+
+        assert np.abs(image - new_image).sum() < 1e-5, "Models don't give the same forward pass"
+
+    def test_from_pretrained_hub_pass_model(self):
+        model_path = "google/ddpm-cifar10-32"
+
+        scheduler = DDPMScheduler(num_train_timesteps=10)
+
+        # pass unet into DiffusionPipeline
+        unet = UNet2DModel.from_pretrained(model_path)
+        ddpm_from_hub_custom_model = DiffusionPipeline.from_pretrained(model_path, unet=unet, scheduler=scheduler)
+        ddpm_from_hub_custom_model = ddpm_from_hub_custom_model.to(torch_device)
+        ddpm_from_hub_custom_model.set_progress_bar_config(disable=None)
+
+        ddpm_from_hub = DiffusionPipeline.from_pretrained(model_path, scheduler=scheduler)
+        ddpm_from_hub = ddpm_from_hub.to(torch_device)
+        ddpm_from_hub_custom_model.set_progress_bar_config(disable=None)
+
+        generator = torch.Generator(device=torch_device).manual_seed(0)
+        image = ddpm_from_hub_custom_model(generator=generator, num_inference_steps=5, output_type="numpy").images
+
+        generator = torch.Generator(device=torch_device).manual_seed(0)
+        new_image = ddpm_from_hub(generator=generator, num_inference_steps=5, output_type="numpy").images
+
+        assert np.abs(image - new_image).sum() < 1e-5, "Models don't give the same forward pass"
+
+    def test_output_format(self):
+        model_path = "google/ddpm-cifar10-32"
+
+        scheduler = DDIMScheduler.from_pretrained(model_path)
+        pipe = DDIMPipeline.from_pretrained(model_path, scheduler=scheduler)
+        pipe.to(torch_device)
+        pipe.set_progress_bar_config(disable=None)
+
+        images = pipe(output_type="numpy").images
+        assert images.shape == (1, 32, 32, 3)
+        assert isinstance(images, np.ndarray)
+
+        images = pipe(output_type="pil", num_inference_steps=4).images
+        assert isinstance(images, list)
+        assert len(images) == 1
+        assert isinstance(images[0], PIL.Image.Image)
+
+        # use PIL by default
+        images = pipe(num_inference_steps=4).images
+        assert isinstance(images, list)
+        assert isinstance(images[0], PIL.Image.Image)
+
+    def test_from_flax_from_pt(self):
+        pipe_pt = StableDiffusionPipeline.from_pretrained(
+            "hf-internal-testing/tiny-stable-diffusion-torch", safety_checker=None
+        )
+        pipe_pt.to(torch_device)
+
+        if not is_flax_available():
+            raise ImportError("Make sure flax is installed.")
+
+        from diffusers import FlaxStableDiffusionPipeline
+
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            pipe_pt.save_pretrained(tmpdirname)
+
+            pipe_flax, params = FlaxStableDiffusionPipeline.from_pretrained(
+                tmpdirname, safety_checker=None, from_pt=True
+            )
+
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            pipe_flax.save_pretrained(tmpdirname, params=params)
+            pipe_pt_2 = StableDiffusionPipeline.from_pretrained(tmpdirname, safety_checker=None, from_flax=True)
+            pipe_pt_2.to(torch_device)
+
+        prompt = "Hello"
+
+        generator = torch.manual_seed(0)
+        image_0 = pipe_pt(
+            [prompt],
+            generator=generator,
+            num_inference_steps=2,
+            output_type="np",
+        ).images[0]
+
+        generator = torch.manual_seed(0)
+        image_1 = pipe_pt_2(
+            [prompt],
+            generator=generator,
+            num_inference_steps=2,
+            output_type="np",
+        ).images[0]
+
+        assert np.abs(image_0 - image_1).sum() < 1e-5, "Models don't give the same forward pass"
+
+
+@nightly
+@require_torch_gpu
+class PipelineNightlyTests(unittest.TestCase):
+    def tearDown(self):
+        # clean up the VRAM after each test
+        super().tearDown()
+        gc.collect()
+        torch.cuda.empty_cache()
+
+    def test_ddpm_ddim_equality_batched(self):
+        seed = 0
+        model_id = "google/ddpm-cifar10-32"
+
+        unet = UNet2DModel.from_pretrained(model_id)
+        ddpm_scheduler = DDPMScheduler()
+        ddim_scheduler = DDIMScheduler()
+
+        ddpm = DDPMPipeline(unet=unet, scheduler=ddpm_scheduler)
+        ddpm.to(torch_device)
+        ddpm.set_progress_bar_config(disable=None)
+
+        ddim = DDIMPipeline(unet=unet, scheduler=ddim_scheduler)
+        ddim.to(torch_device)
+        ddim.set_progress_bar_config(disable=None)
+
+        generator = torch.Generator(device=torch_device).manual_seed(seed)
+        ddpm_images = ddpm(batch_size=2, generator=generator, output_type="numpy").images
+
+        generator = torch.Generator(device=torch_device).manual_seed(seed)
+        ddim_images = ddim(
+            batch_size=2,
+            generator=generator,
+            num_inference_steps=1000,
+            eta=1.0,
+            output_type="numpy",
+            use_clipped_model_output=True,  # Need this to make DDIM match DDPM
+        ).images
+
+        # the values aren't exactly equal, but the images look the same visually
+        assert np.abs(ddpm_images - ddim_images).max() < 1e-1
diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py
index c469b926d683..8c710be398f3 100644
--- a/src/diffusers/pipelines/pipeline_utils.py
+++ b/src/diffusers/pipelines/pipeline_utils.py
@@ -120,13 +120,13 @@ class AudioPipelineOutput(BaseOutput):
 def is_safetensors_compatible(filenames, variant=None) -> bool:
     pt_filenames = set(filename for filename in filenames if filename.endswith(".bin"))
     is_safetensors_compatible = any(file.endswith(".safetensors") for file in filenames)
-    variant = f".{variant}" if variant is not None else ""
 
     for pt_filename in pt_filenames:
+        _variant = f".{variant}" if (variant is not None and variant in pt_filename) else ""
         prefix, raw = os.path.split(pt_filename)
-        if raw == f"pytorch_model{variant}.bin":
+        if raw == f"pytorch_model{_variant}.bin":
             # transformers specific
-            sf_filename = os.path.join(prefix, f"model{variant}.safetensors")
+            sf_filename = os.path.join(prefix, f"model{_variant}.safetensors")
         else:
             sf_filename = pt_filename[: -len(".bin")] + ".safetensors"
         if is_safetensors_compatible and sf_filename not in filenames:
@@ -136,12 +136,14 @@ def is_safetensors_compatible(filenames, variant=None) -> bool:
 
 
 def variant_compatible_siblings(info, variant=None) -> Union[List[os.PathLike], str]:
-    variant = variant or ""
-
     filenames = set(sibling.rfilename for sibling in info.siblings)
     save_formats = ["bin", "safetensors", "msgpack", "onnx"]
 
-    variant_filenames = set(f for f in filenames if any(f.endswith(f"{variant}.{s}") for s in save_formats))
+    if variant is not None:
+        variant_filenames = set(f for f in filenames if any(f.endswith(f"{variant}.{s}") for s in save_formats))
+    else:
+        variant_filenames = set()
+
     non_variant_filenames = set(
         f for f in filenames if (len(f.split(".")) == 2 and any(f.endswith(f".{s}") for s in save_formats))
     )
@@ -532,7 +534,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
 
                 if from_flax:
                     ignore_patterns = ["*.bin", "*.safetensors", ".onnx"]
-                elif is_safetensors_available() and is_safetensors_compatible(model_filenames):
+                elif is_safetensors_available() and is_safetensors_compatible(model_filenames, variant=variant):
                     ignore_patterns = ["*.bin", "*.msgpack"]
                 else:
                     ignore_patterns = ["*.safetensors", "*.msgpack"]
diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py
index 65f79dde26a7..9cbb14e80d5a 100644
--- a/tests/test_pipelines.py
+++ b/tests/test_pipelines.py
@@ -195,6 +195,128 @@ def test_cached_files_are_used_when_no_internet(self):
                 if p1.data.ne(p2.data).sum() > 0:
                     assert False, "Parameters not the same!"
 
+    def test_download_from_variant_folder(self):
+        for safe_avail in [False, True]:
+            import diffusers
+
+            diffusers.utils.import_utils._safetensors_available = safe_avail
+
+            other_format = ".bin" if safe_avail else ".safetensors"
+            with tempfile.TemporaryDirectory() as tmpdirname:
+                StableDiffusionPipeline.from_pretrained(
+                    "hf-internal-testing/stable-diffusion-all-variants", cache_dir=tmpdirname
+                )
+                all_root_files = [
+                    t[-1] for t in os.walk(os.path.join(tmpdirname, os.listdir(tmpdirname)[0], "snapshots"))
+                ]
+                files = [item for sublist in all_root_files for item in sublist]
+
+                # None of the downloaded files should be a variant file even if we have some here:
+                # https://huggingface.co/hf-internal-testing/stable-diffusion-all-variants/tree/main/unet
+                assert len(files) == 15, f"We should only download 15 files, not {len(files)}"
+                assert not any(f.endswith(other_format) for f in files)
+                # no variants
+                assert not any(len(f.split(".")) == 3 for f in files)
+
+        diffusers.utils.import_utils._safetensors_available = True
+
+    def test_download_variant_all(self):
+        for safe_avail in [False, True]:
+            import diffusers
+
+            diffusers.utils.import_utils._safetensors_available = safe_avail
+
+            other_format = ".bin" if safe_avail else ".safetensors"
+            this_format = ".safetensors" if safe_avail else ".bin"
+            variant = "fp16"
+
+            with tempfile.TemporaryDirectory() as tmpdirname:
+                StableDiffusionPipeline.from_pretrained(
+                    "hf-internal-testing/stable-diffusion-all-variants", cache_dir=tmpdirname, variant=variant
+                )
+                all_root_files = [
+                    t[-1] for t in os.walk(os.path.join(tmpdirname, os.listdir(tmpdirname)[0], "snapshots"))
+                ]
+                files = [item for sublist in all_root_files for item in sublist]
+
+                # None of the downloaded files should be a non-variant file even if we have some here:
+                # https://huggingface.co/hf-internal-testing/stable-diffusion-all-variants/tree/main/unet
+                assert len(files) == 15, f"We should only download 15 files, not {len(files)}"
+                # unet, vae, text_encoder, safety_checker
+                assert len([f for f in files if f.endswith(f"{variant}{this_format}")]) == 4
+                # all checkpoints should have variant ending
+                assert not any(f.endswith(this_format) and not f.endswith(f"{variant}{this_format}") for f in files)
+                assert not any(f.endswith(other_format) for f in files)
+
+        diffusers.utils.import_utils._safetensors_available = True
+
+    def test_download_variant_partly(self):
+        for safe_avail in [False, True]:
+            import diffusers
+
+            diffusers.utils.import_utils._safetensors_available = safe_avail
+
+            other_format = ".bin" if safe_avail else ".safetensors"
+            this_format = ".safetensors" if safe_avail else ".bin"
+            variant = "no_ema"
+
+            with tempfile.TemporaryDirectory() as tmpdirname:
+                StableDiffusionPipeline.from_pretrained(
+                    "hf-internal-testing/stable-diffusion-all-variants", cache_dir=tmpdirname, variant=variant
+                )
+                snapshots = os.path.join(tmpdirname, os.listdir(tmpdirname)[0], "snapshots")
+                all_root_files = [t[-1] for t in os.walk(snapshots)]
+                files = [item for sublist in all_root_files for item in sublist]
+
+                unet_files = os.listdir(os.path.join(snapshots, os.listdir(snapshots)[0], "unet"))
+
+                # Some of the downloaded files should be a non-variant file, check:
+                # https://huggingface.co/hf-internal-testing/stable-diffusion-all-variants/tree/main/unet
+                assert len(files) == 15, f"We should only download 15 files, not {len(files)}"
+                # only unet has "no_ema" variant
+                assert f"diffusion_pytorch_model.{variant}{this_format}" in unet_files
+                assert len([f for f in files if f.endswith(f"{variant}{this_format}")]) == 1
+                # vae, safety_checker and text_encoder should have no variant
+                assert sum(f.endswith(this_format) and not f.endswith(f"{variant}{this_format}") for f in files) == 3
+                assert not any(f.endswith(other_format) for f in files)
+
+        diffusers.utils.import_utils._safetensors_available = True
+
+    def test_download_broken_variant(self):
+        for safe_avail in [False, True]:
+            import diffusers
+
+            diffusers.utils.import_utils._safetensors_available = safe_avail
+            # text encoder is missing no variant and "no_ema" variant weights, so the following can't work
+            for variant in [None, "no_ema"]:
+                with self.assertRaises(OSError) as error_context:
+                    with tempfile.TemporaryDirectory() as tmpdirname:
+                        StableDiffusionPipeline.from_pretrained(
+                            "hf-internal-testing/stable-diffusion-broken-variants",
+                            cache_dir=tmpdirname,
+                            variant=variant,
+                        )
+
+                assert "Error no file name" in str(error_context.exception)
+
+            # text encoder has fp16 variants so we can load it
+            with tempfile.TemporaryDirectory() as tmpdirname:
+                pipe = StableDiffusionPipeline.from_pretrained(
+                    "hf-internal-testing/stable-diffusion-broken-variants", cache_dir=tmpdirname, variant="fp16"
+                )
+                assert pipe is not None
+
+                snapshots = os.path.join(tmpdirname, os.listdir(tmpdirname)[0], "snapshots")
+                all_root_files = [t[-1] for t in os.walk(snapshots)]
+                files = [item for sublist in all_root_files for item in sublist]
+
+                # None of the downloaded files should be a non-variant file even if we have some here:
+                # https://huggingface.co/hf-internal-testing/stable-diffusion-broken-variants/tree/main/unet
+                assert len(files) == 15, f"We should only download 15 files, not {len(files)}"
+                # only unet has "no_ema" variant
+
+        diffusers.utils.import_utils._safetensors_available = True
+
 
 class CustomPipelineTests(unittest.TestCase):
     def test_load_custom_pipeline(self):

From e3299515c8434f0aa5d76d9a417af5b3db71373d Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Fri, 10 Feb 2023 17:12:08 +0200
Subject: [PATCH 08/27] up

---
 @ | 1017 -------------------------------------------------------------
 1 file changed, 1017 deletions(-)
 delete mode 100644 @

diff --git a/@ b/@
deleted file mode 100644
index 5c561ede3d02..000000000000
--- a/@
+++ /dev/null
@@ -1,1017 +0,0 @@
-# coding=utf-8
-# Copyright 2022 HuggingFace Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import gc
-import json
-import os
-import random
-import shutil
-import sys
-import tempfile
-import unittest
-import unittest.mock as mock
-
-import numpy as np
-import PIL
-import safetensors.torch
-import torch
-from parameterized import parameterized
-from PIL import Image
-from requests.exceptions import HTTPError
-from transformers import CLIPFeatureExtractor, CLIPModel, CLIPTextConfig, CLIPTextModel, CLIPTokenizer
-
-from diffusers import (
-    AutoencoderKL,
-    DDIMPipeline,
-    DDIMScheduler,
-    DDPMPipeline,
-    DDPMScheduler,
-    DiffusionPipeline,
-    DPMSolverMultistepScheduler,
-    EulerAncestralDiscreteScheduler,
-    EulerDiscreteScheduler,
-    LMSDiscreteScheduler,
-    PNDMScheduler,
-    StableDiffusionImg2ImgPipeline,
-    StableDiffusionInpaintPipelineLegacy,
-    StableDiffusionPipeline,
-    UNet2DConditionModel,
-    UNet2DModel,
-    logging,
-)
-from diffusers.schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME
-from diffusers.utils import CONFIG_NAME, WEIGHTS_NAME, floats_tensor, is_flax_available, nightly, slow, torch_device
-from diffusers.utils.testing_utils import CaptureLogger, get_tests_dir, require_torch_gpu
-
-
-torch.backends.cuda.matmul.allow_tf32 = False
-
-
-class DownloadTests(unittest.TestCase):
-    def test_download_only_pytorch(self):
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            # pipeline has Flax weights
-            _ = DiffusionPipeline.from_pretrained(
-                "hf-internal-testing/tiny-stable-diffusion-pipe", safety_checker=None, cache_dir=tmpdirname
-            )
-
-            all_root_files = [t[-1] for t in os.walk(os.path.join(tmpdirname, os.listdir(tmpdirname)[0], "snapshots"))]
-            files = [item for sublist in all_root_files for item in sublist]
-
-            # None of the downloaded files should be a flax file even if we have some here:
-            # https://huggingface.co/hf-internal-testing/tiny-stable-diffusion-pipe/blob/main/unet/diffusion_flax_model.msgpack
-            assert not any(f.endswith(".msgpack") for f in files)
-            # We need to never convert this tiny model to safetensors for this test to pass
-            assert not any(f.endswith(".safetensors") for f in files)
-
-    def test_returned_cached_folder(self):
-        prompt = "hello"
-        pipe = StableDiffusionPipeline.from_pretrained(
-            "hf-internal-testing/tiny-stable-diffusion-torch", safety_checker=None
-        )
-        _, local_path = StableDiffusionPipeline.from_pretrained(
-            "hf-internal-testing/tiny-stable-diffusion-torch", safety_checker=None, return_cached_folder=True
-        )
-        pipe_2 = StableDiffusionPipeline.from_pretrained(local_path)
-
-        pipe = pipe.to(torch_device)
-        pipe_2 = pipe_2.to(torch_device)
-
-        generator = torch.manual_seed(0)
-        out = pipe(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images
-
-        generator = torch.manual_seed(0)
-        out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images
-
-        assert np.max(np.abs(out - out_2)) < 1e-3
-
-    def test_download_safetensors(self):
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            # pipeline has Flax weights
-            _ = DiffusionPipeline.from_pretrained(
-                "hf-internal-testing/tiny-stable-diffusion-pipe-safetensors",
-                safety_checker=None,
-                cache_dir=tmpdirname,
-            )
-
-            all_root_files = [t[-1] for t in os.walk(os.path.join(tmpdirname, os.listdir(tmpdirname)[0], "snapshots"))]
-            files = [item for sublist in all_root_files for item in sublist]
-
-            # None of the downloaded files should be a pytorch file even if we have some here:
-            # https://huggingface.co/hf-internal-testing/tiny-stable-diffusion-pipe/blob/main/unet/diffusion_flax_model.msgpack
-            assert not any(f.endswith(".bin") for f in files)
-
-    def test_download_no_safety_checker(self):
-        prompt = "hello"
-        pipe = StableDiffusionPipeline.from_pretrained(
-            "hf-internal-testing/tiny-stable-diffusion-torch", safety_checker=None
-        )
-        pipe = pipe.to(torch_device)
-        generator = torch.manual_seed(0)
-        out = pipe(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images
-
-        pipe_2 = StableDiffusionPipeline.from_pretrained("hf-internal-testing/tiny-stable-diffusion-torch")
-        pipe_2 = pipe_2.to(torch_device)
-        generator = torch.manual_seed(0)
-        out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images
-
-        assert np.max(np.abs(out - out_2)) < 1e-3
-
-    def test_load_no_safety_checker_explicit_locally(self):
-        prompt = "hello"
-        pipe = StableDiffusionPipeline.from_pretrained(
-            "hf-internal-testing/tiny-stable-diffusion-torch", safety_checker=None
-        )
-        pipe = pipe.to(torch_device)
-        generator = torch.manual_seed(0)
-        out = pipe(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images
-
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            pipe.save_pretrained(tmpdirname)
-            pipe_2 = StableDiffusionPipeline.from_pretrained(tmpdirname, safety_checker=None)
-            pipe_2 = pipe_2.to(torch_device)
-
-            generator = torch.manual_seed(0)
-
-            out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images
-
-        assert np.max(np.abs(out - out_2)) < 1e-3
-
-    def test_load_no_safety_checker_default_locally(self):
-        prompt = "hello"
-        pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/tiny-stable-diffusion-torch")
-        pipe = pipe.to(torch_device)
-
-        generator = torch.manual_seed(0)
-        out = pipe(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images
-
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            pipe.save_pretrained(tmpdirname)
-            pipe_2 = StableDiffusionPipeline.from_pretrained(tmpdirname)
-            pipe_2 = pipe_2.to(torch_device)
-
-            generator = torch.manual_seed(0)
-
-            out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images
-
-        assert np.max(np.abs(out - out_2)) < 1e-3
-
-    def test_cached_files_are_used_when_no_internet(self):
-        # A mock response for an HTTP head request to emulate server down
-        response_mock = mock.Mock()
-        response_mock.status_code = 500
-        response_mock.headers = {}
-        response_mock.raise_for_status.side_effect = HTTPError
-        response_mock.json.return_value = {}
-
-        # Download this model to make sure it's in the cache.
-        orig_pipe = StableDiffusionPipeline.from_pretrained(
-            "hf-internal-testing/tiny-stable-diffusion-torch", safety_checker=None
-        )
-        orig_comps = {k: v for k, v in orig_pipe.components.items() if hasattr(v, "parameters")}
-
-        # Under the mock environment we get a 500 error when trying to reach the model.
-        with mock.patch("requests.request", return_value=response_mock):
-            # Download this model to make sure it's in the cache.
-            pipe = StableDiffusionPipeline.from_pretrained(
-                "hf-internal-testing/tiny-stable-diffusion-torch", safety_checker=None, local_files_only=True
-            )
-            comps = {k: v for k, v in pipe.components.items() if hasattr(v, "parameters")}
-
-        for m1, m2 in zip(orig_comps.values(), comps.values()):
-            for p1, p2 in zip(m1.parameters(), m2.parameters()):
-                if p1.data.ne(p2.data).sum() > 0:
-                    assert False, "Parameters not the same!"
-
-    def test_download_from_variant_folder(self):
-        for safe_avail in [False, True]:
-            import diffusers
-            diffusers.utils.import_utils._safetensors_available = safe_avail
-
-            other_format = ".bin" if safe_avail else ".safetensors"
-            with tempfile.TemporaryDirectory() as tmpdirname:
-                StableDiffusionPipeline.from_pretrained("hf-internal-testing/stable-diffusion-all-variants", cache_dir=tmpdirname)
-                all_root_files = [t[-1] for t in os.walk(os.path.join(tmpdirname, os.listdir(tmpdirname)[0], "snapshots"))]
-                files = [item for sublist in all_root_files for item in sublist]
-
-                # None of the downloaded files should be a variant file even if we have some here:
-                # https://huggingface.co/hf-internal-testing/stable-diffusion-all-variants/tree/main/unet
-                assert len(files) == 15, f"We should only download 15 files, not {len(files)}"
-                assert not any(f.endswith(other_format) for f in files)
-                # no variants
-                assert not any(len(f.split(".")) == 3 for f in files)
-
-        diffusers.utils.import_utils._safetensors_available = True
-
-    def test_download_variant_all(self):
-        for safe_avail in [False, True]:
-            import diffusers
-            diffusers.utils.import_utils._safetensors_available = safe_avail
-
-            other_format = ".bin" if safe_avail else ".safetensors"
-            this_format = ".safetensors" if safe_avail else ".bin"
-            variant = "fp16"
-
-            with tempfile.TemporaryDirectory() as tmpdirname:
-                StableDiffusionPipeline.from_pretrained("hf-internal-testing/stable-diffusion-all-variants", cache_dir=tmpdirname, variant=variant)
-                all_root_files = [t[-1] for t in os.walk(os.path.join(tmpdirname, os.listdir(tmpdirname)[0], "snapshots"))]
-                files = [item for sublist in all_root_files for item in sublist]
-
-                # None of the downloaded files should be a non-variant file even if we have some here:
-                # https://huggingface.co/hf-internal-testing/stable-diffusion-all-variants/tree/main/unet
-                import ipdb; ipdb.set_trace()
-                assert len(files) == 15, f"We should only download 15 files, not {len(files)}"
-                # unet, vae, text_encoder, safety_checker
-                assert len([f for f in files if f.endswith(f"{variant}{this_format}")]) == 4
-                # all checkpoints should have variant ending
-                assert not any(f.endswith(this_format) and not f.endswith(f"{variant}{this_format}") for f in files)
-                assert not any(f.endswith(other_format) for f in files)
-
-        diffusers.utils.import_utils._safetensors_available = True
-
-    def test_download_variant_partly(self):
-        for safe_avail in [False, True]:
-            import diffusers
-            diffusers.utils.import_utils._safetensors_available = safe_avail
-
-            other_format = ".bin" if safe_avail else ".safetensors"
-            this_format = ".safetensors" if safe_avail else ".bin"
-            variant = "no_ema"
-
-            with tempfile.TemporaryDirectory() as tmpdirname:
-                StableDiffusionPipeline.from_pretrained("hf-internal-testing/stable-diffusion-all-variants", cache_dir=tmpdirname, variant=variant)
-                snapshots = os.path.join(tmpdirname, os.listdir(tmpdirname)[0], "snapshots")
-                all_root_files = [t[-1] for t in os.walk(snapshots)]
-                files = [item for sublist in all_root_files for item in sublist]
-
-
-                # None of the downloaded files should be a non-variant file even if we have some here:
-                # https://huggingface.co/hf-internal-testing/stable-diffusion-all-variants/tree/main/unet
-                import ipdb; ipdb.set_trace()
-                assert len(files) == 15, f"We should only download 15 files, not {len(files)}"
-                # only unet has "no_ema" variant
-                assert len([f for f in files if f.endswith(f"{variant}{this_format}")]) == 1
-                # vae, safety_checker and text_encoder should have no variant
-                assert sum(f.endswith(this_format) and not f.endswith(f"{variant}{this_format}") for f in files) == 3
-                assert not any(f.endswith(other_format) for f in files)
-
-        diffusers.utils.import_utils._safetensors_available = True
-
-
-class CustomPipelineTests(unittest.TestCase):
-    def test_load_custom_pipeline(self):
-        pipeline = DiffusionPipeline.from_pretrained(
-            "google/ddpm-cifar10-32", custom_pipeline="hf-internal-testing/diffusers-dummy-pipeline"
-        )
-        pipeline = pipeline.to(torch_device)
-        # NOTE that `"CustomPipeline"` is not a class that is defined in this library, but solely on the Hub
-        # under https://huggingface.co/hf-internal-testing/diffusers-dummy-pipeline/blob/main/pipeline.py#L24
-        assert pipeline.__class__.__name__ == "CustomPipeline"
-
-    def test_load_custom_github(self):
-        pipeline = DiffusionPipeline.from_pretrained(
-            "google/ddpm-cifar10-32", custom_pipeline="one_step_unet", custom_revision="main"
-        )
-
-        # make sure that on "main" pipeline gives only ones because of: https://github.com/huggingface/diffusers/pull/1690
-        with torch.no_grad():
-            output = pipeline()
-
-        assert output.numel() == output.sum()
-
-        # hack since Python doesn't like overwriting modules: https://stackoverflow.com/questions/3105801/unload-a-module-in-python
-        # Could in the future work with hashes instead.
-        del sys.modules["diffusers_modules.git.one_step_unet"]
-
-        pipeline = DiffusionPipeline.from_pretrained(
-            "google/ddpm-cifar10-32", custom_pipeline="one_step_unet", custom_revision="0.10.2"
-        )
-        with torch.no_grad():
-            output = pipeline()
-
-        assert output.numel() != output.sum()
-
-        assert pipeline.__class__.__name__ == "UnetSchedulerOneForwardPipeline"
-
-    def test_run_custom_pipeline(self):
-        pipeline = DiffusionPipeline.from_pretrained(
-            "google/ddpm-cifar10-32", custom_pipeline="hf-internal-testing/diffusers-dummy-pipeline"
-        )
-        pipeline = pipeline.to(torch_device)
-        images, output_str = pipeline(num_inference_steps=2, output_type="np")
-
-        assert images[0].shape == (1, 32, 32, 3)
-
-        # compare output to https://huggingface.co/hf-internal-testing/diffusers-dummy-pipeline/blob/main/pipeline.py#L102
-        assert output_str == "This is a test"
-
-    def test_local_custom_pipeline_repo(self):
-        local_custom_pipeline_path = get_tests_dir("fixtures/custom_pipeline")
-        pipeline = DiffusionPipeline.from_pretrained(
-            "google/ddpm-cifar10-32", custom_pipeline=local_custom_pipeline_path
-        )
-        pipeline = pipeline.to(torch_device)
-        images, output_str = pipeline(num_inference_steps=2, output_type="np")
-
-        assert pipeline.__class__.__name__ == "CustomLocalPipeline"
-        assert images[0].shape == (1, 32, 32, 3)
-        # compare to https://github.com/huggingface/diffusers/blob/main/tests/fixtures/custom_pipeline/pipeline.py#L102
-        assert output_str == "This is a local test"
-
-    def test_local_custom_pipeline_file(self):
-        local_custom_pipeline_path = get_tests_dir("fixtures/custom_pipeline")
-        local_custom_pipeline_path = os.path.join(local_custom_pipeline_path, "what_ever.py")
-        pipeline = DiffusionPipeline.from_pretrained(
-            "google/ddpm-cifar10-32", custom_pipeline=local_custom_pipeline_path
-        )
-        pipeline = pipeline.to(torch_device)
-        images, output_str = pipeline(num_inference_steps=2, output_type="np")
-
-        assert pipeline.__class__.__name__ == "CustomLocalPipeline"
-        assert images[0].shape == (1, 32, 32, 3)
-        # compare to https://github.com/huggingface/diffusers/blob/main/tests/fixtures/custom_pipeline/pipeline.py#L102
-        assert output_str == "This is a local test"
-
-    @slow
-    @require_torch_gpu
-    def test_load_pipeline_from_git(self):
-        clip_model_id = "laion/CLIP-ViT-B-32-laion2B-s34B-b79K"
-
-        feature_extractor = CLIPFeatureExtractor.from_pretrained(clip_model_id)
-        clip_model = CLIPModel.from_pretrained(clip_model_id, torch_dtype=torch.float16)
-
-        pipeline = DiffusionPipeline.from_pretrained(
-            "CompVis/stable-diffusion-v1-4",
-            custom_pipeline="clip_guided_stable_diffusion",
-            clip_model=clip_model,
-            feature_extractor=feature_extractor,
-            torch_dtype=torch.float16,
-        )
-        pipeline.enable_attention_slicing()
-        pipeline = pipeline.to(torch_device)
-
-        # NOTE that `"CLIPGuidedStableDiffusion"` is not a class that is defined in the pypi package of th e library, but solely on the community examples folder of GitHub under:
-        # https://github.com/huggingface/diffusers/blob/main/examples/community/clip_guided_stable_diffusion.py
-        assert pipeline.__class__.__name__ == "CLIPGuidedStableDiffusion"
-
-        image = pipeline("a prompt", num_inference_steps=2, output_type="np").images[0]
-        assert image.shape == (512, 512, 3)
-
-
-class PipelineFastTests(unittest.TestCase):
-    def tearDown(self):
-        # clean up the VRAM after each test
-        super().tearDown()
-        gc.collect()
-        torch.cuda.empty_cache()
-
-        import diffusers
-
-        diffusers.utils.import_utils._safetensors_available = True
-
-    def dummy_image(self):
-        batch_size = 1
-        num_channels = 3
-        sizes = (32, 32)
-
-        image = floats_tensor((batch_size, num_channels) + sizes, rng=random.Random(0)).to(torch_device)
-        return image
-
-    def dummy_uncond_unet(self, sample_size=32):
-        torch.manual_seed(0)
-        model = UNet2DModel(
-            block_out_channels=(32, 64),
-            layers_per_block=2,
-            sample_size=sample_size,
-            in_channels=3,
-            out_channels=3,
-            down_block_types=("DownBlock2D", "AttnDownBlock2D"),
-            up_block_types=("AttnUpBlock2D", "UpBlock2D"),
-        )
-        return model
-
-    def dummy_cond_unet(self, sample_size=32):
-        torch.manual_seed(0)
-        model = UNet2DConditionModel(
-            block_out_channels=(32, 64),
-            layers_per_block=2,
-            sample_size=sample_size,
-            in_channels=4,
-            out_channels=4,
-            down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
-            up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
-            cross_attention_dim=32,
-        )
-        return model
-
-    @property
-    def dummy_vae(self):
-        torch.manual_seed(0)
-        model = AutoencoderKL(
-            block_out_channels=[32, 64],
-            in_channels=3,
-            out_channels=3,
-            down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"],
-            up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"],
-            latent_channels=4,
-        )
-        return model
-
-    @property
-    def dummy_text_encoder(self):
-        torch.manual_seed(0)
-        config = CLIPTextConfig(
-            bos_token_id=0,
-            eos_token_id=2,
-            hidden_size=32,
-            intermediate_size=37,
-            layer_norm_eps=1e-05,
-            num_attention_heads=4,
-            num_hidden_layers=5,
-            pad_token_id=1,
-            vocab_size=1000,
-        )
-        return CLIPTextModel(config)
-
-    @property
-    def dummy_extractor(self):
-        def extract(*args, **kwargs):
-            class Out:
-                def __init__(self):
-                    self.pixel_values = torch.ones([0])
-
-                def to(self, device):
-                    self.pixel_values.to(device)
-                    return self
-
-            return Out()
-
-        return extract
-
-    @parameterized.expand(
-        [
-            [DDIMScheduler, DDIMPipeline, 32],
-            [DDPMScheduler, DDPMPipeline, 32],
-            [DDIMScheduler, DDIMPipeline, (32, 64)],
-            [DDPMScheduler, DDPMPipeline, (64, 32)],
-        ]
-    )
-    def test_uncond_unet_components(self, scheduler_fn=DDPMScheduler, pipeline_fn=DDPMPipeline, sample_size=32):
-        unet = self.dummy_uncond_unet(sample_size)
-        scheduler = scheduler_fn()
-        pipeline = pipeline_fn(unet, scheduler).to(torch_device)
-
-        generator = torch.manual_seed(0)
-        out_image = pipeline(
-            generator=generator,
-            num_inference_steps=2,
-            output_type="np",
-        ).images
-        sample_size = (sample_size, sample_size) if isinstance(sample_size, int) else sample_size
-        assert out_image.shape == (1, *sample_size, 3)
-
-    def test_stable_diffusion_components(self):
-        """Test that components property works correctly"""
-        unet = self.dummy_cond_unet()
-        scheduler = PNDMScheduler(skip_prk_steps=True)
-        vae = self.dummy_vae
-        bert = self.dummy_text_encoder
-        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
-
-        image = self.dummy_image().cpu().permute(0, 2, 3, 1)[0]
-        init_image = Image.fromarray(np.uint8(image)).convert("RGB")
-        mask_image = Image.fromarray(np.uint8(image + 4)).convert("RGB").resize((32, 32))
-
-        # make sure here that pndm scheduler skips prk
-        inpaint = StableDiffusionInpaintPipelineLegacy(
-            unet=unet,
-            scheduler=scheduler,
-            vae=vae,
-            text_encoder=bert,
-            tokenizer=tokenizer,
-            safety_checker=None,
-            feature_extractor=self.dummy_extractor,
-        ).to(torch_device)
-        img2img = StableDiffusionImg2ImgPipeline(**inpaint.components).to(torch_device)
-        text2img = StableDiffusionPipeline(**inpaint.components).to(torch_device)
-
-        prompt = "A painting of a squirrel eating a burger"
-
-        generator = torch.manual_seed(0)
-        image_inpaint = inpaint(
-            [prompt],
-            generator=generator,
-            num_inference_steps=2,
-            output_type="np",
-            image=init_image,
-            mask_image=mask_image,
-        ).images
-        image_img2img = img2img(
-            [prompt],
-            generator=generator,
-            num_inference_steps=2,
-            output_type="np",
-            image=init_image,
-        ).images
-        image_text2img = text2img(
-            [prompt],
-            generator=generator,
-            num_inference_steps=2,
-            output_type="np",
-        ).images
-
-        assert image_inpaint.shape == (1, 32, 32, 3)
-        assert image_img2img.shape == (1, 32, 32, 3)
-        assert image_text2img.shape == (1, 64, 64, 3)
-
-    def test_set_scheduler(self):
-        unet = self.dummy_cond_unet()
-        scheduler = PNDMScheduler(skip_prk_steps=True)
-        vae = self.dummy_vae
-        bert = self.dummy_text_encoder
-        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
-
-        sd = StableDiffusionPipeline(
-            unet=unet,
-            scheduler=scheduler,
-            vae=vae,
-            text_encoder=bert,
-            tokenizer=tokenizer,
-            safety_checker=None,
-            feature_extractor=self.dummy_extractor,
-        )
-
-        sd.scheduler = DDIMScheduler.from_config(sd.scheduler.config)
-        assert isinstance(sd.scheduler, DDIMScheduler)
-        sd.scheduler = DDPMScheduler.from_config(sd.scheduler.config)
-        assert isinstance(sd.scheduler, DDPMScheduler)
-        sd.scheduler = PNDMScheduler.from_config(sd.scheduler.config)
-        assert isinstance(sd.scheduler, PNDMScheduler)
-        sd.scheduler = LMSDiscreteScheduler.from_config(sd.scheduler.config)
-        assert isinstance(sd.scheduler, LMSDiscreteScheduler)
-        sd.scheduler = EulerDiscreteScheduler.from_config(sd.scheduler.config)
-        assert isinstance(sd.scheduler, EulerDiscreteScheduler)
-        sd.scheduler = EulerAncestralDiscreteScheduler.from_config(sd.scheduler.config)
-        assert isinstance(sd.scheduler, EulerAncestralDiscreteScheduler)
-        sd.scheduler = DPMSolverMultistepScheduler.from_config(sd.scheduler.config)
-        assert isinstance(sd.scheduler, DPMSolverMultistepScheduler)
-
-    def test_set_scheduler_consistency(self):
-        unet = self.dummy_cond_unet()
-        pndm = PNDMScheduler.from_config("hf-internal-testing/tiny-stable-diffusion-torch", subfolder="scheduler")
-        ddim = DDIMScheduler.from_config("hf-internal-testing/tiny-stable-diffusion-torch", subfolder="scheduler")
-        vae = self.dummy_vae
-        bert = self.dummy_text_encoder
-        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
-
-        sd = StableDiffusionPipeline(
-            unet=unet,
-            scheduler=pndm,
-            vae=vae,
-            text_encoder=bert,
-            tokenizer=tokenizer,
-            safety_checker=None,
-            feature_extractor=self.dummy_extractor,
-        )
-
-        pndm_config = sd.scheduler.config
-        sd.scheduler = DDPMScheduler.from_config(pndm_config)
-        sd.scheduler = PNDMScheduler.from_config(sd.scheduler.config)
-        pndm_config_2 = sd.scheduler.config
-        pndm_config_2 = {k: v for k, v in pndm_config_2.items() if k in pndm_config}
-
-        assert dict(pndm_config) == dict(pndm_config_2)
-
-        sd = StableDiffusionPipeline(
-            unet=unet,
-            scheduler=ddim,
-            vae=vae,
-            text_encoder=bert,
-            tokenizer=tokenizer,
-            safety_checker=None,
-            feature_extractor=self.dummy_extractor,
-        )
-
-        ddim_config = sd.scheduler.config
-        sd.scheduler = LMSDiscreteScheduler.from_config(ddim_config)
-        sd.scheduler = DDIMScheduler.from_config(sd.scheduler.config)
-        ddim_config_2 = sd.scheduler.config
-        ddim_config_2 = {k: v for k, v in ddim_config_2.items() if k in ddim_config}
-
-        assert dict(ddim_config) == dict(ddim_config_2)
-
-    def test_save_safe_serialization(self):
-        pipeline = StableDiffusionPipeline.from_pretrained("hf-internal-testing/tiny-stable-diffusion-torch")
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            pipeline.save_pretrained(tmpdirname, safe_serialization=True)
-
-            # Validate that the VAE safetensor exists and are of the correct format
-            vae_path = os.path.join(tmpdirname, "vae", "diffusion_pytorch_model.safetensors")
-            assert os.path.exists(vae_path), f"Could not find {vae_path}"
-            _ = safetensors.torch.load_file(vae_path)
-
-            # Validate that the UNet safetensor exists and are of the correct format
-            unet_path = os.path.join(tmpdirname, "unet", "diffusion_pytorch_model.safetensors")
-            assert os.path.exists(unet_path), f"Could not find {unet_path}"
-            _ = safetensors.torch.load_file(unet_path)
-
-            # Validate that the text encoder safetensor exists and are of the correct format
-            text_encoder_path = os.path.join(tmpdirname, "text_encoder", "model.safetensors")
-            assert os.path.exists(text_encoder_path), f"Could not find {text_encoder_path}"
-            _ = safetensors.torch.load_file(text_encoder_path)
-
-            pipeline = StableDiffusionPipeline.from_pretrained(tmpdirname)
-            assert pipeline.unet is not None
-            assert pipeline.vae is not None
-            assert pipeline.text_encoder is not None
-            assert pipeline.scheduler is not None
-            assert pipeline.feature_extractor is not None
-
-    def test_no_pytorch_download_when_doing_safetensors(self):
-        # by default we don't download
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            _ = StableDiffusionPipeline.from_pretrained(
-                "hf-internal-testing/diffusers-stable-diffusion-tiny-all", cache_dir=tmpdirname
-            )
-
-            path = os.path.join(
-                tmpdirname,
-                "models--hf-internal-testing--diffusers-stable-diffusion-tiny-all",
-                "snapshots",
-                "07838d72e12f9bcec1375b0482b80c1d399be843",
-                "unet",
-            )
-            # safetensors exists
-            assert os.path.exists(os.path.join(path, "diffusion_pytorch_model.safetensors"))
-            # pytorch does not
-            assert not os.path.exists(os.path.join(path, "diffusion_pytorch_model.bin"))
-
-    def test_no_safetensors_download_when_doing_pytorch(self):
-        # mock diffusers safetensors not available
-        import diffusers
-
-        diffusers.utils.import_utils._safetensors_available = False
-
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            _ = StableDiffusionPipeline.from_pretrained(
-                "hf-internal-testing/diffusers-stable-diffusion-tiny-all", cache_dir=tmpdirname
-            )
-
-            path = os.path.join(
-                tmpdirname,
-                "models--hf-internal-testing--diffusers-stable-diffusion-tiny-all",
-                "snapshots",
-                "07838d72e12f9bcec1375b0482b80c1d399be843",
-                "unet",
-            )
-            # safetensors does not exists
-            assert not os.path.exists(os.path.join(path, "diffusion_pytorch_model.safetensors"))
-            # pytorch does
-            assert os.path.exists(os.path.join(path, "diffusion_pytorch_model.bin"))
-
-        diffusers.utils.import_utils._safetensors_available = True
-
-    def test_optional_components(self):
-        unet = self.dummy_cond_unet()
-        pndm = PNDMScheduler.from_config("hf-internal-testing/tiny-stable-diffusion-torch", subfolder="scheduler")
-        vae = self.dummy_vae
-        bert = self.dummy_text_encoder
-        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
-
-        orig_sd = StableDiffusionPipeline(
-            unet=unet,
-            scheduler=pndm,
-            vae=vae,
-            text_encoder=bert,
-            tokenizer=tokenizer,
-            safety_checker=unet,
-            feature_extractor=self.dummy_extractor,
-        )
-        sd = orig_sd
-
-        assert sd.config.requires_safety_checker is True
-
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            sd.save_pretrained(tmpdirname)
-
-            # Test that passing None works
-            sd = StableDiffusionPipeline.from_pretrained(
-                tmpdirname, feature_extractor=None, safety_checker=None, requires_safety_checker=False
-            )
-
-            assert sd.config.requires_safety_checker is False
-            assert sd.config.safety_checker == (None, None)
-            assert sd.config.feature_extractor == (None, None)
-
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            sd.save_pretrained(tmpdirname)
-
-            # Test that loading previous None works
-            sd = StableDiffusionPipeline.from_pretrained(tmpdirname)
-
-            assert sd.config.requires_safety_checker is False
-            assert sd.config.safety_checker == (None, None)
-            assert sd.config.feature_extractor == (None, None)
-
-            orig_sd.save_pretrained(tmpdirname)
-
-            # Test that loading without any directory works
-            shutil.rmtree(os.path.join(tmpdirname, "safety_checker"))
-            with open(os.path.join(tmpdirname, sd.config_name)) as f:
-                config = json.load(f)
-                config["safety_checker"] = [None, None]
-            with open(os.path.join(tmpdirname, sd.config_name), "w") as f:
-                json.dump(config, f)
-
-            sd = StableDiffusionPipeline.from_pretrained(tmpdirname, requires_safety_checker=False)
-            sd.save_pretrained(tmpdirname)
-            sd = StableDiffusionPipeline.from_pretrained(tmpdirname)
-
-            assert sd.config.requires_safety_checker is False
-            assert sd.config.safety_checker == (None, None)
-            assert sd.config.feature_extractor == (None, None)
-
-            # Test that loading from deleted model index works
-            with open(os.path.join(tmpdirname, sd.config_name)) as f:
-                config = json.load(f)
-                del config["safety_checker"]
-                del config["feature_extractor"]
-            with open(os.path.join(tmpdirname, sd.config_name), "w") as f:
-                json.dump(config, f)
-
-            sd = StableDiffusionPipeline.from_pretrained(tmpdirname)
-
-            assert sd.config.requires_safety_checker is False
-            assert sd.config.safety_checker == (None, None)
-            assert sd.config.feature_extractor == (None, None)
-
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            sd.save_pretrained(tmpdirname)
-
-            # Test that partially loading works
-            sd = StableDiffusionPipeline.from_pretrained(tmpdirname, feature_extractor=self.dummy_extractor)
-
-            assert sd.config.requires_safety_checker is False
-            assert sd.config.safety_checker == (None, None)
-            assert sd.config.feature_extractor != (None, None)
-
-            # Test that partially loading works
-            sd = StableDiffusionPipeline.from_pretrained(
-                tmpdirname,
-                feature_extractor=self.dummy_extractor,
-                safety_checker=unet,
-                requires_safety_checker=[True, True],
-            )
-
-            assert sd.config.requires_safety_checker == [True, True]
-            assert sd.config.safety_checker != (None, None)
-            assert sd.config.feature_extractor != (None, None)
-
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            sd.save_pretrained(tmpdirname)
-            sd = StableDiffusionPipeline.from_pretrained(tmpdirname, feature_extractor=self.dummy_extractor)
-
-            assert sd.config.requires_safety_checker == [True, True]
-            assert sd.config.safety_checker != (None, None)
-            assert sd.config.feature_extractor != (None, None)
-
-
-@slow
-@require_torch_gpu
-class PipelineSlowTests(unittest.TestCase):
-    def tearDown(self):
-        # clean up the VRAM after each test
-        super().tearDown()
-        gc.collect()
-        torch.cuda.empty_cache()
-
-    def test_smart_download(self):
-        model_id = "hf-internal-testing/unet-pipeline-dummy"
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            _ = DiffusionPipeline.from_pretrained(model_id, cache_dir=tmpdirname, force_download=True)
-            local_repo_name = "--".join(["models"] + model_id.split("/"))
-            snapshot_dir = os.path.join(tmpdirname, local_repo_name, "snapshots")
-            snapshot_dir = os.path.join(snapshot_dir, os.listdir(snapshot_dir)[0])
-
-            # inspect all downloaded files to make sure that everything is included
-            assert os.path.isfile(os.path.join(snapshot_dir, DiffusionPipeline.config_name))
-            assert os.path.isfile(os.path.join(snapshot_dir, CONFIG_NAME))
-            assert os.path.isfile(os.path.join(snapshot_dir, SCHEDULER_CONFIG_NAME))
-            assert os.path.isfile(os.path.join(snapshot_dir, WEIGHTS_NAME))
-            assert os.path.isfile(os.path.join(snapshot_dir, "scheduler", SCHEDULER_CONFIG_NAME))
-            assert os.path.isfile(os.path.join(snapshot_dir, "unet", WEIGHTS_NAME))
-            assert os.path.isfile(os.path.join(snapshot_dir, "unet", WEIGHTS_NAME))
-            # let's make sure the super large numpy file:
-            # https://huggingface.co/hf-internal-testing/unet-pipeline-dummy/blob/main/big_array.npy
-            # is not downloaded, but all the expected ones
-            assert not os.path.isfile(os.path.join(snapshot_dir, "big_array.npy"))
-
-    def test_warning_unused_kwargs(self):
-        model_id = "hf-internal-testing/unet-pipeline-dummy"
-        logger = logging.get_logger("diffusers.pipelines")
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            with CaptureLogger(logger) as cap_logger:
-                DiffusionPipeline.from_pretrained(
-                    model_id,
-                    not_used=True,
-                    cache_dir=tmpdirname,
-                    force_download=True,
-                )
-
-        assert (
-            cap_logger.out
-            == "Keyword arguments {'not_used': True} are not expected by DDPMPipeline and will be ignored.\n"
-        )
-
-    def test_from_save_pretrained(self):
-        # 1. Load models
-        model = UNet2DModel(
-            block_out_channels=(32, 64),
-            layers_per_block=2,
-            sample_size=32,
-            in_channels=3,
-            out_channels=3,
-            down_block_types=("DownBlock2D", "AttnDownBlock2D"),
-            up_block_types=("AttnUpBlock2D", "UpBlock2D"),
-        )
-        schedular = DDPMScheduler(num_train_timesteps=10)
-
-        ddpm = DDPMPipeline(model, schedular)
-        ddpm.to(torch_device)
-        ddpm.set_progress_bar_config(disable=None)
-
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            ddpm.save_pretrained(tmpdirname)
-            new_ddpm = DDPMPipeline.from_pretrained(tmpdirname)
-            new_ddpm.to(torch_device)
-
-        generator = torch.Generator(device=torch_device).manual_seed(0)
-        image = ddpm(generator=generator, num_inference_steps=5, output_type="numpy").images
-
-        generator = torch.Generator(device=torch_device).manual_seed(0)
-        new_image = new_ddpm(generator=generator, num_inference_steps=5, output_type="numpy").images
-
-        assert np.abs(image - new_image).sum() < 1e-5, "Models don't give the same forward pass"
-
-    def test_from_pretrained_hub(self):
-        model_path = "google/ddpm-cifar10-32"
-
-        scheduler = DDPMScheduler(num_train_timesteps=10)
-
-        ddpm = DDPMPipeline.from_pretrained(model_path, scheduler=scheduler)
-        ddpm = ddpm.to(torch_device)
-        ddpm.set_progress_bar_config(disable=None)
-
-        ddpm_from_hub = DiffusionPipeline.from_pretrained(model_path, scheduler=scheduler)
-        ddpm_from_hub = ddpm_from_hub.to(torch_device)
-        ddpm_from_hub.set_progress_bar_config(disable=None)
-
-        generator = torch.Generator(device=torch_device).manual_seed(0)
-        image = ddpm(generator=generator, num_inference_steps=5, output_type="numpy").images
-
-        generator = torch.Generator(device=torch_device).manual_seed(0)
-        new_image = ddpm_from_hub(generator=generator, num_inference_steps=5, output_type="numpy").images
-
-        assert np.abs(image - new_image).sum() < 1e-5, "Models don't give the same forward pass"
-
-    def test_from_pretrained_hub_pass_model(self):
-        model_path = "google/ddpm-cifar10-32"
-
-        scheduler = DDPMScheduler(num_train_timesteps=10)
-
-        # pass unet into DiffusionPipeline
-        unet = UNet2DModel.from_pretrained(model_path)
-        ddpm_from_hub_custom_model = DiffusionPipeline.from_pretrained(model_path, unet=unet, scheduler=scheduler)
-        ddpm_from_hub_custom_model = ddpm_from_hub_custom_model.to(torch_device)
-        ddpm_from_hub_custom_model.set_progress_bar_config(disable=None)
-
-        ddpm_from_hub = DiffusionPipeline.from_pretrained(model_path, scheduler=scheduler)
-        ddpm_from_hub = ddpm_from_hub.to(torch_device)
-        ddpm_from_hub_custom_model.set_progress_bar_config(disable=None)
-
-        generator = torch.Generator(device=torch_device).manual_seed(0)
-        image = ddpm_from_hub_custom_model(generator=generator, num_inference_steps=5, output_type="numpy").images
-
-        generator = torch.Generator(device=torch_device).manual_seed(0)
-        new_image = ddpm_from_hub(generator=generator, num_inference_steps=5, output_type="numpy").images
-
-        assert np.abs(image - new_image).sum() < 1e-5, "Models don't give the same forward pass"
-
-    def test_output_format(self):
-        model_path = "google/ddpm-cifar10-32"
-
-        scheduler = DDIMScheduler.from_pretrained(model_path)
-        pipe = DDIMPipeline.from_pretrained(model_path, scheduler=scheduler)
-        pipe.to(torch_device)
-        pipe.set_progress_bar_config(disable=None)
-
-        images = pipe(output_type="numpy").images
-        assert images.shape == (1, 32, 32, 3)
-        assert isinstance(images, np.ndarray)
-
-        images = pipe(output_type="pil", num_inference_steps=4).images
-        assert isinstance(images, list)
-        assert len(images) == 1
-        assert isinstance(images[0], PIL.Image.Image)
-
-        # use PIL by default
-        images = pipe(num_inference_steps=4).images
-        assert isinstance(images, list)
-        assert isinstance(images[0], PIL.Image.Image)
-
-    def test_from_flax_from_pt(self):
-        pipe_pt = StableDiffusionPipeline.from_pretrained(
-            "hf-internal-testing/tiny-stable-diffusion-torch", safety_checker=None
-        )
-        pipe_pt.to(torch_device)
-
-        if not is_flax_available():
-            raise ImportError("Make sure flax is installed.")
-
-        from diffusers import FlaxStableDiffusionPipeline
-
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            pipe_pt.save_pretrained(tmpdirname)
-
-            pipe_flax, params = FlaxStableDiffusionPipeline.from_pretrained(
-                tmpdirname, safety_checker=None, from_pt=True
-            )
-
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            pipe_flax.save_pretrained(tmpdirname, params=params)
-            pipe_pt_2 = StableDiffusionPipeline.from_pretrained(tmpdirname, safety_checker=None, from_flax=True)
-            pipe_pt_2.to(torch_device)
-
-        prompt = "Hello"
-
-        generator = torch.manual_seed(0)
-        image_0 = pipe_pt(
-            [prompt],
-            generator=generator,
-            num_inference_steps=2,
-            output_type="np",
-        ).images[0]
-
-        generator = torch.manual_seed(0)
-        image_1 = pipe_pt_2(
-            [prompt],
-            generator=generator,
-            num_inference_steps=2,
-            output_type="np",
-        ).images[0]
-
-        assert np.abs(image_0 - image_1).sum() < 1e-5, "Models don't give the same forward pass"
-
-
-@nightly
-@require_torch_gpu
-class PipelineNightlyTests(unittest.TestCase):
-    def tearDown(self):
-        # clean up the VRAM after each test
-        super().tearDown()
-        gc.collect()
-        torch.cuda.empty_cache()
-
-    def test_ddpm_ddim_equality_batched(self):
-        seed = 0
-        model_id = "google/ddpm-cifar10-32"
-
-        unet = UNet2DModel.from_pretrained(model_id)
-        ddpm_scheduler = DDPMScheduler()
-        ddim_scheduler = DDIMScheduler()
-
-        ddpm = DDPMPipeline(unet=unet, scheduler=ddpm_scheduler)
-        ddpm.to(torch_device)
-        ddpm.set_progress_bar_config(disable=None)
-
-        ddim = DDIMPipeline(unet=unet, scheduler=ddim_scheduler)
-        ddim.to(torch_device)
-        ddim.set_progress_bar_config(disable=None)
-
-        generator = torch.Generator(device=torch_device).manual_seed(seed)
-        ddpm_images = ddpm(batch_size=2, generator=generator, output_type="numpy").images
-
-        generator = torch.Generator(device=torch_device).manual_seed(seed)
-        ddim_images = ddim(
-            batch_size=2,
-            generator=generator,
-            num_inference_steps=1000,
-            eta=1.0,
-            output_type="numpy",
-            use_clipped_model_output=True,  # Need this to make DDIM match DDPM
-        ).images
-
-        # the values aren't exactly equal, but the images look the same visually
-        assert np.abs(ddpm_images - ddim_images).max() < 1e-1

From 710480d71d809477675659b1c656202318704d1a Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Mon, 13 Feb 2023 17:11:50 +0000
Subject: [PATCH 09/27] use path splittetx

---
 src/diffusers/models/modeling_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index 41445bb4b35a..594f0105a40f 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -143,7 +143,7 @@ def load(module: torch.nn.Module, prefix=""):
 
 def _add_variant(weights_name: str, variant: Optional[str] = None) -> str:
     if variant is not None:
-        splits = weights_name.split(".")
+        splits = os.path.splittext(weights_name)
         splits = splits[:-1] + [variant] + splits[-1:]
         weights_name = ".".join(splits)
 

From 9262bbfe5b21c06a143c8644c11c3322b122e15e Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Mon, 13 Feb 2023 18:36:03 +0000
Subject: [PATCH 10/27] add deprecate

---
 src/diffusers/models/modeling_utils.py | 53 +++++++++++++++++++-------
 src/diffusers/utils/__init__.py        |  1 +
 src/diffusers/utils/constants.py       |  1 +
 3 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index 594f0105a40f..fafc6f6c5de0 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -15,7 +15,9 @@
 # limitations under the License.
 
 import inspect
+import warnings
 import os
+import version
 from functools import partial
 from typing import Callable, List, Optional, Tuple, Union
 
@@ -33,6 +35,7 @@
     HF_HUB_OFFLINE,
     HUGGINGFACE_CO_RESOLVE_ENDPOINT,
     SAFETENSORS_WEIGHTS_NAME,
+    DEPRECATED_REVISION_ARGS,
     WEIGHTS_NAME,
     is_accelerate_available,
     is_safetensors_available,
@@ -818,20 +821,42 @@ def _get_model_file(
             )
     else:
         try:
-            # Load from URL or cache if already cached
-            model_file = hf_hub_download(
-                pretrained_model_name_or_path,
-                filename=weights_name,
-                cache_dir=cache_dir,
-                force_download=force_download,
-                proxies=proxies,
-                resume_download=resume_download,
-                local_files_only=local_files_only,
-                use_auth_token=use_auth_token,
-                user_agent=user_agent,
-                subfolder=subfolder,
-                revision=revision,
-            )
+            if revision in DEPRECATED_REVISION_ARGS and version.parse(version.parse(__version__).base_version) >= version.parse("0.16.0"):
+                variant = _add_variant(weights_name, revision)
+
+                try:
+                    model_file = hf_hub_download(
+                        pretrained_model_name_or_path,
+                        filename=weights_name,
+                        cache_dir=cache_dir,
+                        force_download=force_download,
+                        proxies=proxies,
+                        resume_download=resume_download,
+                        local_files_only=local_files_only,
+                        use_auth_token=use_auth_token,
+                        user_agent=user_agent,
+                        subfolder=subfolder,
+                        revision=revision,
+                    )
+                    warnings.warn(f"You are loading the variant {variant} from {pretrained_model_name_or_path} via `revision='{variant}'` even though you can load it via `variant=`{variant}`. Loading model variants via `revision='{variant}'` is deprecated and will be removed in diffusers v1. Please use `variant='{variant}'` instead. For more information, please have a look at: ", FutureWarning)
+                except:
+                    warnings.warn(f"You are loading the variant {variant} from {pretrained_model_name_or_path} via `revision='{variant}'`. This behavior is deprecated and will be removed in diffusers v1. Instead one should use `variant='{variant}'` instead, but it appears that {pretrained_model_name_or_path} currently does not have a {_add_variant(weights_name)} file, which is a in {pretrained_model_name_or_path}. \n\n The Diffusers team and community would be very grateful if you could open an issue: https://github.com/huggingface/diffusers/issues/new with the title {pretrained_model_name_or_path} is missing {_add_variant(weights_name)} so that the correct variant file can be added.")
+                    model_file = None
+            else:
+                # Load from URL or cache if already cached
+                model_file = hf_hub_download(
+                    pretrained_model_name_or_path,
+                    filename=weights_name,
+                    cache_dir=cache_dir,
+                    force_download=force_download,
+                    proxies=proxies,
+                    resume_download=resume_download,
+                    local_files_only=local_files_only,
+                    use_auth_token=use_auth_token,
+                    user_agent=user_agent,
+                    subfolder=subfolder,
+                    revision=revision,
+                )
             return model_file
 
         except RepositoryNotFoundError:
diff --git a/src/diffusers/utils/__init__.py b/src/diffusers/utils/__init__.py
index 8e61b5757eb5..643663405d04 100644
--- a/src/diffusers/utils/__init__.py
+++ b/src/diffusers/utils/__init__.py
@@ -29,6 +29,7 @@
     ONNX_WEIGHTS_NAME,
     SAFETENSORS_WEIGHTS_NAME,
     WEIGHTS_NAME,
+    DEPRECATED_REVISION_ARGS,
 )
 from .deprecation_utils import deprecate
 from .doc_utils import replace_example_docstring
diff --git a/src/diffusers/utils/constants.py b/src/diffusers/utils/constants.py
index 0edb4c57f076..a4a25fedba79 100644
--- a/src/diffusers/utils/constants.py
+++ b/src/diffusers/utils/constants.py
@@ -30,3 +30,4 @@
 DIFFUSERS_CACHE = default_cache_path
 DIFFUSERS_DYNAMIC_MODULE_NAME = "diffusers_modules"
 HF_MODULES_CACHE = os.getenv("HF_MODULES_CACHE", os.path.join(hf_cache_home, "modules"))
+DEPRECATED_REVISION_ARGS = ["fp16", "non-ema"]

From 4a0ff60eb962a8b23e97a177a1b29ea4b3f14c27 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Mon, 13 Feb 2023 19:27:44 +0000
Subject: [PATCH 11/27] deprecation warnings

---
 src/diffusers/models/modeling_utils.py    | 24 ++++++++----
 src/diffusers/pipelines/pipeline_utils.py | 46 +++++++++++++++++++++--
 src/diffusers/utils/__init__.py           |  2 +-
 3 files changed, 59 insertions(+), 13 deletions(-)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index fafc6f6c5de0..61ce4e3a8c64 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -15,27 +15,27 @@
 # limitations under the License.
 
 import inspect
-import warnings
 import os
-import version
+import warnings
 from functools import partial
 from typing import Callable, List, Optional, Tuple, Union
 
 import torch
 from huggingface_hub import hf_hub_download
 from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError
+from packaging import version
 from requests import HTTPError
 from torch import Tensor, device
 
 from .. import __version__
 from ..utils import (
     CONFIG_NAME,
+    DEPRECATED_REVISION_ARGS,
     DIFFUSERS_CACHE,
     FLAX_WEIGHTS_NAME,
     HF_HUB_OFFLINE,
     HUGGINGFACE_CO_RESOLVE_ENDPOINT,
     SAFETENSORS_WEIGHTS_NAME,
-    DEPRECATED_REVISION_ARGS,
     WEIGHTS_NAME,
     is_accelerate_available,
     is_safetensors_available,
@@ -146,7 +146,7 @@ def load(module: torch.nn.Module, prefix=""):
 
 def _add_variant(weights_name: str, variant: Optional[str] = None) -> str:
     if variant is not None:
-        splits = os.path.splittext(weights_name)
+        splits = os.path.splitext(weights_name)
         splits = splits[:-1] + [variant] + splits[-1:]
         weights_name = ".".join(splits)
 
@@ -821,7 +821,9 @@ def _get_model_file(
             )
     else:
         try:
-            if revision in DEPRECATED_REVISION_ARGS and version.parse(version.parse(__version__).base_version) >= version.parse("0.16.0"):
+            if revision in DEPRECATED_REVISION_ARGS and version.parse(
+                version.parse(__version__).base_version
+            ) >= version.parse("0.16.0"):
                 variant = _add_variant(weights_name, revision)
 
                 try:
@@ -838,9 +840,15 @@ def _get_model_file(
                         subfolder=subfolder,
                         revision=revision,
                     )
-                    warnings.warn(f"You are loading the variant {variant} from {pretrained_model_name_or_path} via `revision='{variant}'` even though you can load it via `variant=`{variant}`. Loading model variants via `revision='{variant}'` is deprecated and will be removed in diffusers v1. Please use `variant='{variant}'` instead. For more information, please have a look at: ", FutureWarning)
-                except:
-                    warnings.warn(f"You are loading the variant {variant} from {pretrained_model_name_or_path} via `revision='{variant}'`. This behavior is deprecated and will be removed in diffusers v1. Instead one should use `variant='{variant}'` instead, but it appears that {pretrained_model_name_or_path} currently does not have a {_add_variant(weights_name)} file, which is a in {pretrained_model_name_or_path}. \n\n The Diffusers team and community would be very grateful if you could open an issue: https://github.com/huggingface/diffusers/issues/new with the title {pretrained_model_name_or_path} is missing {_add_variant(weights_name)} so that the correct variant file can be added.")
+                    warnings.warn(
+                        f"You are loading the variant {variant} from {pretrained_model_name_or_path} via `revision='{variant}'` even though you can load it via `variant=`{variant}`. Loading model variants via `revision='{variant}'` is deprecated and will be removed in diffusers v1. Please use `variant='{variant}'` instead. For more information, please have a look at: ",
+                        FutureWarning,
+                    )
+                except:  # noqa: E722
+                    warnings.warn(
+                        f"You are loading the variant {variant} from {pretrained_model_name_or_path} via `revision='{variant}'`. This behavior is deprecated and will be removed in diffusers v1. One should use `variant='{variant}'` instead. However, it appears that {pretrained_model_name_or_path} currently does not have a {_add_variant(weights_name)} file in the 'main' branch of {pretrained_model_name_or_path}. \n The Diffusers team and community would be very grateful if you could open an issue: https://github.com/huggingface/diffusers/issues/new with the title '{pretrained_model_name_or_path} is missing {_add_variant(weights_name)}' so that the correct variant file can be added.",
+                        FutureWarning,
+                    )
                     model_file = None
             else:
                 # Load from URL or cache if already cached
diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py
index 8c710be398f3..60f48376b386 100644
--- a/src/diffusers/pipelines/pipeline_utils.py
+++ b/src/diffusers/pipelines/pipeline_utils.py
@@ -17,6 +17,7 @@
 import importlib
 import inspect
 import os
+import warnings
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, Union
@@ -31,11 +32,13 @@
 
 import diffusers
 
+from .. import __version__
 from ..configuration_utils import ConfigMixin
 from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT
 from ..schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME
 from ..utils import (
     CONFIG_NAME,
+    DEPRECATED_REVISION_ARGS,
     DIFFUSERS_CACHE,
     HF_HUB_OFFLINE,
     BaseOutput,
@@ -516,6 +519,30 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                 model_filenames = variant_compatible_siblings(info, variant=variant)
                 model_folder_names = set([os.path.split(f)[0] for f in model_filenames])
 
+                if revision in DEPRECATED_REVISION_ARGS and version.parse(
+                    version.parse(__version__).base_version
+                ) >= version.parse("0.10.0"):
+                    info = model_info(
+                        pretrained_model_name_or_path,
+                        use_auth_token=use_auth_token,
+                        revision=None,
+                    )
+                    comp_model_filenames = variant_compatible_siblings(info, variant=revision)
+                    comp_model_filenames = [
+                        ".".join(os.path.splitext(f)[:1] + os.path.splitext(f)[2:]) for f in comp_model_filenames
+                    ]
+
+                    if set(comp_model_filenames) == set(model_filenames):
+                        warnings.warn(
+                            f"You are loading the variant {revision} from {pretrained_model_name_or_path} via `revision='{revision}'` even though you can load it via `variant=`{revision}`. Loading model variants via `revision='{variant}'` is deprecated and will be removed in diffusers v1. Please use `variant='{revision}'` instead. For more information, please have a look at: ",
+                            FutureWarning,
+                        )
+                    else:
+                        warnings.warn(
+                            f"You are loading the variant {revision} from {pretrained_model_name_or_path} via `revision='{revision}'`. This behavior is deprecated and will be removed in diffusers v1. One should use `variant='{revision}'` instead. However, it appears that {pretrained_model_name_or_path} currently does not have the required variant filenames in the 'main' branch. \n The Diffusers team and community would be very grateful if you could open an issue: https://github.com/huggingface/diffusers/issues/new with the title '{pretrained_model_name_or_path} is missing {revision} files' so that the correct variant file can be added.",
+                            FutureWarning,
+                        )
+
                 # all filenames compatible with variant will be added
                 allow_patterns = list(model_filenames)
 
@@ -580,9 +607,6 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                 if variant_exists:
                     model_variants[folder] = variant
 
-        # TODO(PVP) - delete if not needed anymore
-        print(os.system(f"cd {cached_folder} && tree"))
-
         # 2. Load the pipeline class, if using custom module then load it from the hub
         # if we load from explicit class, let's use it
         if custom_pipeline is not None:
@@ -767,10 +791,11 @@ def load_module(name, value):
                     loading_kwargs["sess_options"] = sess_options
 
                 is_diffusers_model = issubclass(class_obj, diffusers.ModelMixin)
+                transformers_version = version.parse(version.parse(transformers.__version__).base_version)
                 is_transformers_model = (
                     is_transformers_available()
                     and issubclass(class_obj, PreTrainedModel)
-                    and version.parse(version.parse(transformers.__version__).base_version) >= version.parse("4.20.0")
+                    and transformers_version >= version.parse("4.20.0")
                 )
 
                 # When loading a transformers model, if the device_map is None, the weights will be initialized as opposed to diffusers.
@@ -782,6 +807,19 @@ def load_module(name, value):
                     if from_flax:
                         loading_kwargs["from_flax"] = True
 
+                    # the following can be deleted once the minimum required `transformers` version
+                    # is higher than 4.27
+                    if (
+                        is_transformers_model
+                        and loading_kwargs["variant"] is not None
+                        and transformers_version < version.parse("4.27.0")
+                    ):
+                        raise ImportError(
+                            f"When passing `variant='{variant}'`, please make sure to upgrade your `transformers` version to at least 4.27.0.dev0"
+                        )
+                    elif is_transformers_model and loading_kwargs["variant"] is None:
+                        loading_kwargs.pop("variant")
+
                     # if `from_flax` and model is transformer model, can currently not load with `low_cpu_mem_usage`
                     if not (from_flax and is_transformers_model):
                         loading_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage
diff --git a/src/diffusers/utils/__init__.py b/src/diffusers/utils/__init__.py
index 643663405d04..bebb74cdd73d 100644
--- a/src/diffusers/utils/__init__.py
+++ b/src/diffusers/utils/__init__.py
@@ -20,6 +20,7 @@
 from .. import __version__
 from .constants import (
     CONFIG_NAME,
+    DEPRECATED_REVISION_ARGS,
     DIFFUSERS_CACHE,
     DIFFUSERS_DYNAMIC_MODULE_NAME,
     FLAX_WEIGHTS_NAME,
@@ -29,7 +30,6 @@
     ONNX_WEIGHTS_NAME,
     SAFETENSORS_WEIGHTS_NAME,
     WEIGHTS_NAME,
-    DEPRECATED_REVISION_ARGS,
 )
 from .deprecation_utils import deprecate
 from .doc_utils import replace_example_docstring

From 010f2ed1348ef126036482bb13ad0abe06f4c915 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Tue, 14 Feb 2023 12:21:24 +0200
Subject: [PATCH 12/27] improve docs

---
 docs/source/en/using-diffusers/loading.mdx | 196 +++++++++++++++------
 src/diffusers/models/modeling_utils.py     |   2 +-
 2 files changed, 145 insertions(+), 53 deletions(-)

diff --git a/docs/source/en/using-diffusers/loading.mdx b/docs/source/en/using-diffusers/loading.mdx
index 97bb7a0d037a..52200096f501 100644
--- a/docs/source/en/using-diffusers/loading.mdx
+++ b/docs/source/en/using-diffusers/loading.mdx
@@ -23,31 +23,50 @@ In the following we explain in-detail how to easily load:
 
 ## Loading pipelines
 
-The [`DiffusionPipeline`] class is the easiest way to access any diffusion model that is [available on the Hub](https://huggingface.co/models?library=diffusers). Let's look at an example on how to download [CompVis' Latent Diffusion model](https://huggingface.co/CompVis/ldm-text2im-large-256).
+The [`DiffusionPipeline`] class is the easiest way to access any diffusion model that is [available on the Hub](https://huggingface.co/models?library=diffusers). Let's look at an example on how to download [Runway's Stable Diffusion model](https://huggingface.co/runwayml/stable-diffusion-v1-5).
 
 ```python
 from diffusers import DiffusionPipeline
 
-repo_id = "CompVis/ldm-text2im-large-256"
-ldm = DiffusionPipeline.from_pretrained(repo_id)
+repo_id = "runwayml/stable-diffusion-v1-5"
+pipe = DiffusionPipeline.from_pretrained(repo_id)
 ```
 
-Here [`DiffusionPipeline`] automatically detects the correct pipeline (*i.e.* [`LDMTextToImagePipeline`]), downloads and caches all required configuration and weight files (if not already done so), and finally returns a pipeline instance, called `ldm`.
-The pipeline instance can then be called using [`LDMTextToImagePipeline.__call__`] (i.e., `ldm("image of a astronaut riding a horse")`) for text-to-image generation.
+Here [`DiffusionPipeline`] automatically detects the correct pipeline (*i.e.* [`StableDiffusionPipeline`]), downloads and caches all required configuration and weight files (if not already done so), and finally returns a pipeline instance, called `pipe`.
+The pipeline instance can then be called using [`StableDiffusionPipeline.__call__`] (i.e., `pipe("image of a astronaut riding a horse")`) for text-to-image generation.
 
 Instead of using the generic [`DiffusionPipeline`] class for loading, you can also load the appropriate pipeline class directly. The code snippet above yields the same instance as when doing:
 
 ```python
-from diffusers import LDMTextToImagePipeline
+from diffusers import StableDiffusionPipeline
 
-repo_id = "CompVis/ldm-text2im-large-256"
-ldm = LDMTextToImagePipeline.from_pretrained(repo_id)
+repo_id = "runwayml/stable-diffusion-v1-5"
+pipe = StableDiffusionPipeline.from_pretrained(repo_id)
 ```
 
-Diffusion pipelines like `LDMTextToImagePipeline` often consist of multiple components. These components can be both parameterized models, such as `"unet"`, `"vqvae"` and "bert", tokenizers or schedulers. These components can interact in complex ways with each other when using the pipeline in inference, *e.g.* for [`LDMTextToImagePipeline`] or [`StableDiffusionPipeline`] the inference call is explained [here](https://huggingface.co/blog/stable_diffusion#how-does-stable-diffusion-work).
+<Tip>
+
+Many checkpoints, such as [CompVis/stable-diffusion-v1-4](https://huggingface.co/CompVis/stable-diffusion-v1-4) and [runwayml/stable-diffusion-v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5) can be used for multiple tasks, *e.g.* *text-to-image* or *image-to-image*.
+If you want to use those checkpoints for a task that is different from the default one, you have to load it directly from the corresponding task-specific pipeline class:
+
+```python
+from diffusers import StableDiffusionImg2ImgPipeline
+
+repo_id = "runwayml/stable-diffusion-v1-5"
+pipe = StableDiffusionImg2ImgPipeline.from_pretrained(repo_id)
+```
+
+</Tip>
+
+
+Diffusion pipelines like `StableDiffusionImg2ImgPipeline` consist of multiple components. These components can be both parameterized models, such as `"unet"`, `"vae"` and "text_encoder", tokenizers or schedulers. 
+These components often interact in complex ways with each other when using the pipeline in inference, *e.g.* for [`StableDiffusionPipeline`] the inference call is explained [here](https://huggingface.co/blog/stable_diffusion#how-does-stable-diffusion-work).
 The purpose of the [pipeline classes](./api/overview#diffusers-summary) is to wrap the complexity of these diffusion systems and give the user an easy-to-use API while staying flexible for customization, as will be shown later.
 
-### Loading pipelines that require access request
+<!---
+THE FOLLOWING CAN BE UNCOMMENTED ONCE WE HAVE NEW MODELS WITH ACCESS REQUIREMENT
+
+# Loading pipelines that require access request
 
 Due to the capabilities of diffusion models to generate extremely realistic images, there is a certain danger that such models might be misused for unwanted applications, *e.g.* generating pornography or violent images.
 In order to minimize the possibility of such unsolicited use cases, some of the most powerful diffusion models require users to acknowledge a license before being able to use the model. If the user does not agree to the license, the pipeline cannot be downloaded.
@@ -94,6 +113,7 @@ stable_diffusion = DiffusionPipeline.from_pretrained(repo_id, use_auth_token="<y
 ```
 
 The final option to use pipelines that require access without having to rely on the Hugging Face Hub is to load the pipeline locally as explained in the next section.
+-->
 
 ### Loading pipelines locally
 
@@ -101,9 +121,9 @@ If you prefer to have complete control over the pipeline and its corresponding f
 we recommend loading pipelines locally.
 
 To load a diffusion pipeline locally, you first need to manually download the whole folder structure on your local disk and then pass a local path to the [`DiffusionPipeline.from_pretrained`]. Let's again look at an example for 
-[CompVis' Latent Diffusion model](https://huggingface.co/CompVis/ldm-text2im-large-256).
+[Runway's Stable Diffusion Diffusion model](https://huggingface.co/runwayml/stable-diffusion-v1-5).
 
-First, you should make use of [`git-lfs`](https://git-lfs.github.com/) to download the whole folder structure that has been uploaded to the [model repository](https://huggingface.co/CompVis/ldm-text2im-large-256/tree/main):
+First, you should make use of [`git-lfs`](https://git-lfs.github.com/) to download the whole folder structure that has been uploaded to the [model repository](https://huggingface.co/runwayml/stable-diffusion-v1-5/tree/main):
 
 ```
 git lfs install
@@ -178,105 +198,177 @@ stable_diffusion_img2img = StableDiffusionImg2ImgPipeline(**components)
 
 Note how the above code snippet makes use of [`DiffusionPipeline.components`].
 
+### Loading variants
+
+Diffusion Pipeline checkpoints can offer variants of the "main" diffusion pipeline checkpoint.
+Such checkpoint variants are usually variations of the checkpoint that have advantages for specific use-cases and that are so similar to the "main" checkpoint that they **should not** be put in a new checkpoint. A variation of a checkpoint has to have **exactly** the same checkpoint layout including all weights having the same tensor shapes.
+
+#### Let's first talk about what checkpoint variants are **not**.
+
+Checkpoint variants do **not** include different serialization format (such as [safetensors](https://huggingface.co/docs/diffusers/main/en/using-diffusers/using_safetensors)) as different serialization formats are 
+identical to the "main" serialization format, just loaded with different code.
+
+Also variants do not correspond to different checkpoint layouts, *e.g.* [stable-diffusion-v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5) is not a variant of [stable-diffusion-2-0](https://huggingface.co/stabilityai/stable-diffusion-2) since the checkpoint layout is different (Stable Diffusion 1-5 uses a different `CLIPTextModel` compared to Stable Diffusion 2.0).
+
+Pipeline checkpoints that are identical in checkpoint layout, but have been trained on different datasets, trained with vastly diffenent training setups and thus correspond to different official releases (such as [Stable Diffusion v1-4](https://huggingface.co/CompVis/stable-diffusion-v1-4) and [Stable Diffusion v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5)) should probably also not be stored as variations of each other, but rather as individual model repositories.
+
+#### So what are checkpoint variants then? 
+
+Checkpoint variants usually consists of the checkpoint stored in "*low-precision, low-storage*" dtype so that less bandwith is required to download them or of *non-exponential-averaged* weights that shall be used when continuing fine-tuning the checkpoint.
+Both such use case have clear advantages in certain settings, have the same serialization format, and correspond to an adaption of the "main" checkpoint which does not warrant a new model repository. 
+A checkpoint stored in [torch's half-precision / float16 format](https://pytorch.org/blog/accelerating-training-on-nvidia-gpus-with-pytorch-automatic-mixed-precision/) requires only half the bandwith and storage when downloading the checkpoint,
+**but** cannot be used when continuing training the checkpoint or when running the checkpoint on CPU.
+Similarly the *non-exponential-averaged* version of the checkpoint should be used when continuing fine-tuning of the model checkpoint, **but** should not be used when using the checkpoint for inference. 
+
+#### How to save and load variants
+
+Saving a diffusion pipeline as a variant can be done by providing [`DiffusionPipeline.save_pretrained`] with the `variant` argument. 
+The `variant` extends the weight name by the provided variation, by changing the default weight name from `diffusion_pytorch_model.bin` to `diffusion_pytorch_model.{variant}.bin` or from `diffusion_pytorch_model.safetensors` to `diffusion_pytorch_model.{variant}.safetensors`. By doing so, one creates a variant of the pipeline checkpoint that can be loaded **instead** of the "main" pipeline checkpoint.
+
+Let's have a look at how we could create a float16 variant of a pipeline. First, we load 
+the "main" variant of a checkpoint into mixed precision format, using `dtype=torch.float16`.
+
+```py
+from diffusers import DiffusionPipeline
+import torch
+
+pipe = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", dtype=torch.float16)
+```
+
+Now all model components of the pipeline are stored in half-precision dtype. We can now save the 
+pipeline under a `"fp16"` variant as follows:
+
+```py
+pipe.save_pretrained("./stable-diffusion-v1-5", variant="fp16")
+```
+
+
+
+
+
 ### How does loading work?
 
 As a class method, [`DiffusionPipeline.from_pretrained`] is responsible for two things:
 - Download the latest version of the folder structure required to run the `repo_id` with `diffusers` and cache them. If the latest folder structure is available in the local cache, [`DiffusionPipeline.from_pretrained`] will simply reuse the cache and **not** re-download the files.
 - Load the cached weights into the _correct_ pipeline class – one of the [officially supported pipeline classes](./api/overview#diffusers-summary) - and return an instance of the class. The _correct_ pipeline class is thereby retrieved from the `model_index.json` file.
 
-The underlying folder structure of diffusion pipelines correspond 1-to-1 to their corresponding class instances, *e.g.* [`LDMTextToImagePipeline`] for [`CompVis/ldm-text2im-large-256`](https://huggingface.co/CompVis/ldm-text2im-large-256)
+The underlying folder structure of diffusion pipelines correspond 1-to-1 to their corresponding class instances, *e.g.* [`StableDiffusionPipeline`] for [`runwayml/stable-diffusion-v1-5`](https://huggingface.co/runwayml/stable-diffusion-v1-5)
 This can be understood better by looking at an example. Let's print out pipeline class instance `pipeline` we just defined:
 
 ```python
 from diffusers import DiffusionPipeline
 
-repo_id = "CompVis/ldm-text2im-large-256"
-ldm = DiffusionPipeline.from_pretrained(repo_id)
-print(ldm)
+repo_id = "runwayml/stable-diffusion-v1-5"
+pipe = DiffusionPipeline.from_pretrained(repo_id)
+print(pipe)
 ```
 
 *Output*:
 ```
-LDMTextToImagePipeline {
-  "bert": [
-    "latent_diffusion",
-    "LDMBertModel"
+StableDiffusionPipeline {
+  "feature_extractor": [
+    "transformers",
+    "CLIPFeatureExtractor"
+  ],
+  "safety_checker": [
+    "stable_diffusion",
+    "StableDiffusionSafetyChecker"
   ],
   "scheduler": [
     "diffusers",
-    "DDIMScheduler"
+    "PNDMScheduler"
+  ],
+  "text_encoder": [
+    "transformers",
+    "CLIPTextModel"
   ],
   "tokenizer": [
     "transformers",
-    "BertTokenizer"
+    "CLIPTokenizer"
   ],
   "unet": [
     "diffusers",
     "UNet2DConditionModel"
   ],
-  "vqvae": [
+  "vae": [
     "diffusers",
     "AutoencoderKL"
   ]
 }
 ```
 
-First, we see that the official pipeline is the [`LDMTextToImagePipeline`], and second we see that the `LDMTextToImagePipeline` consists of 5 components:
-- `"bert"` of class `LDMBertModel` as defined [in the pipeline](https://github.com/huggingface/diffusers/blob/cd502b25cf0debac6f98d27a6638ef95208d1ea2/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py#L664)
-- `"scheduler"` of class [`DDIMScheduler`]
-- `"tokenizer"` of class `BertTokenizer` as defined [in `transformers`](https://huggingface.co/docs/transformers/model_doc/bert#transformers.BertTokenizer)
-- `"unet"` of class [`UNet2DConditionModel`]
-- `"vqvae"` of class [`AutoencoderKL`]
+First, we see that the official pipeline is the [`StableDiffusionPipeline`], and second we see that the `StableDiffusionPipeline` consists of 7 components:
+- `"feature_extractor"` of class `CLIPFeatureExtractor` as defined [in `transformes`](https://huggingface.co/docs/transformers/main/en/model_doc/clip#transformers.CLIPFeatureExtractor).
+- `"safety_checker"` as defined [here](https://github.com/huggingface/diffusers/blob/e55687e1e15407f60f32242027b7bb8170e58266/src/diffusers/pipelines/stable_diffusion/safety_checker.py#L32).
+- `"scheduler"` of class [`PNDMScheduler`].
+- `"text_encoder"` of class `CLIPTextModel` as defined [in `transformes`](https://huggingface.co/docs/transformers/main/en/model_doc/clip#transformers.CLIPTextModel).
+- `"tokenizer"` of class `CLIPTokenizer` as defined [in `transformers`](https://huggingface.co/docs/transformers/main/en/model_doc/clip#transformers.CLIPTokenizer).
+- `"unet"` of class [`UNet2DConditionModel`].
+- `"vae"` of class [`AutoencoderKL`].
 
-Let's now compare the pipeline instance to the folder structure of the model repository `CompVis/ldm-text2im-large-256`. Looking at the folder structure of [`CompVis/ldm-text2im-large-256`](https://huggingface.co/CompVis/ldm-text2im-large-256/tree/main) on the Hub, we can see it matches 1-to-1 the printed out instance of `LDMTextToImagePipeline` above:
+Let's now compare the pipeline instance to the folder structure of the model repository `runwayml/stable-diffusion-v1-5`. Looking at the folder structure of [`runwayml/stable-diffusion-v1-5`](https://huggingface.co/runwayml/stable-diffusion-v1-5/tree/main) on the Hub and  excluding model and saving format variants, we can see it matches 1-to-1 the printed out instance of `StableDiffusionPipeline` above:
 
 ```
 .
-├── bert
+├── feature_extractor
+│   └── preprocessor_config.json
+├── model_index.json
+├── safety_checker
 │   ├── config.json
 │   └── pytorch_model.bin
-├── model_index.json
 ├── scheduler
 │   └── scheduler_config.json
+├── text_encoder
+│   ├── config.json
+│   └── pytorch_model.bin
 ├── tokenizer
+│   ├── merges.txt
 │   ├── special_tokens_map.json
 │   ├── tokenizer_config.json
-│   └── vocab.txt
+│   └── vocab.json
 ├── unet
 │   ├── config.json
-│   └── diffusion_pytorch_model.bin
-└── vqvae
+│   ├── diffusion_pytorch_model.bin
+└── vae
     ├── config.json
-    └── diffusion_pytorch_model.bin
+    ├── diffusion_pytorch_model.bin
 ```
 
-As we can see each attribute of the instance of `LDMTextToImagePipeline` has its configuration and possibly weights defined in a subfolder that is called **exactly** like the class attribute (`"bert"`, `"scheduler"`, `"tokenizer"`, `"unet"`, `"vqvae"`). Importantly, every pipeline expects a `model_index.json` file that tells the `DiffusionPipeline` both:
+Each attribute of the instance of `StableDiffusionPipeline` has its configuration and possibly weights defined in a subfolder that is called **exactly** like the class attribute (`"feature_extractor"`, `"safety_checker"`, `"scheduler"`, `"text_encoder"`, `"tokenizer"`, `"unet"`, `"vae"`). Importantly, every pipeline expects a `model_index.json` file that tells the `DiffusionPipeline` both:
 - which pipeline class should be loaded, and
 - what sub-classes from which library are stored in which subfolders
 
-In the case of `CompVis/ldm-text2im-large-256` the `model_index.json` is therefore defined as follows:
+In the case of `runwayml/stable-diffusion-v1-5` the `model_index.json` is therefore defined as follows:
 
 ```
 {
-  "_class_name": "LDMTextToImagePipeline",
-  "_diffusers_version": "0.0.4",
-  "bert": [
-    "latent_diffusion",
-    "LDMBertModel"
+  "_class_name": "StableDiffusionPipeline",
+  "_diffusers_version": "0.6.0",
+  "feature_extractor": [
+    "transformers",
+    "CLIPFeatureExtractor"
+  ],
+  "safety_checker": [
+    "stable_diffusion",
+    "StableDiffusionSafetyChecker"
   ],
   "scheduler": [
     "diffusers",
-    "DDIMScheduler"
+    "PNDMScheduler"
+  ],
+  "text_encoder": [
+    "transformers",
+    "CLIPTextModel"
   ],
   "tokenizer": [
     "transformers",
-    "BertTokenizer"
+    "CLIPTokenizer"
   ],
   "unet": [
     "diffusers",
     "UNet2DConditionModel"
   ],
-  "vqvae": [
+  "vae": [
     "diffusers",
     "AutoencoderKL"
   ]
@@ -292,9 +384,9 @@ In the case of `CompVis/ldm-text2im-large-256` the `model_index.json` is therefo
   "class"
 ]
 ```
-	- The `"name"` field corresponds both to the name of the subfolder in which the configuration and weights are stored as well as the attribute name of the pipeline class (as can be seen [here](https://huggingface.co/CompVis/ldm-text2im-large-256/tree/main/bert) and [here](https://github.com/huggingface/diffusers/blob/cd502b25cf0debac6f98d27a6638ef95208d1ea2/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py#L42)
+	- The `"name"` field corresponds both to the name of the subfolder in which the configuration and weights are stored as well as the attribute name of the pipeline class (as can be seen [here](https://huggingface.co/runwayml/stable-diffusion-v1-5/tree/main/bert) and [here](https://github.com/huggingface/diffusers/blob/cd502b25cf0debac6f98d27a6638ef95208d1ea2/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py#L42)
 	- The `"library"` field corresponds to the name of the library, *e.g.* `diffusers` or `transformers` from which the `"class"` should be loaded
-	- The `"class"` field corresponds to the name of the class, *e.g.* [`BertTokenizer`](https://huggingface.co/docs/transformers/model_doc/bert#transformers.BertTokenizer) or [`UNet2DConditionModel`]
+	- The `"class"` field corresponds to the name of the class, *e.g.* [`CLIPTokenizer`](https://huggingface.co/docs/transformers/main/en/model_doc/clip#transformers.CLIPTokenizer) or [`UNet2DConditionModel`]
 
 
 ## Loading models
@@ -310,19 +402,19 @@ Let's look at an example:
 ```python
 from diffusers import UNet2DConditionModel
 
-repo_id = "CompVis/ldm-text2im-large-256"
+repo_id = "runwayml/stable-diffusion-v1-5"
 model = UNet2DConditionModel.from_pretrained(repo_id, subfolder="unet")
 ```
 
-Note how we have to define the `subfolder="unet"` argument to tell [`ModelMixin.from_pretrained`] that the model weights are located in a [subfolder of the repository](https://huggingface.co/CompVis/ldm-text2im-large-256/tree/main/unet).
+Note how we have to define the `subfolder="unet"` argument to tell [`ModelMixin.from_pretrained`] that the model weights are located in a [subfolder of the repository](https://huggingface.co/runwayml/stable-diffusion-v1-5/tree/main/unet).
 
 As explained in [Loading customized pipelines]("./using-diffusers/loading#loading-customized-pipelines"), one can pass a loaded model to a diffusion pipeline, via [`DiffusionPipeline.from_pretrained`]:
 
 ```python
 from diffusers import DiffusionPipeline
 
-repo_id = "CompVis/ldm-text2im-large-256"
-ldm = DiffusionPipeline.from_pretrained(repo_id, unet=model)
+repo_id = "runwayml/stable-diffusion-v1-5"
+pipe = DiffusionPipeline.from_pretrained(repo_id, unet=model)
 ```
 
 If the model files can be found directly at the root level, which is usually only the case for some very simple diffusion models, such as [`google/ddpm-cifar10-32`](https://huggingface.co/google/ddpm-cifar10-32), we don't 
diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index 61ce4e3a8c64..984392c5328f 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -147,7 +147,7 @@ def load(module: torch.nn.Module, prefix=""):
 def _add_variant(weights_name: str, variant: Optional[str] = None) -> str:
     if variant is not None:
         splits = os.path.splitext(weights_name)
-        splits = splits[:-1] + [variant] + splits[-1:]
+        splits = splits[:-1] + (variant,) + splits[-1:]
         weights_name = ".".join(splits)
 
     return weights_name

From bdebb3689ccc4a52830da11cf144e76a33e174bf Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Tue, 14 Feb 2023 10:27:51 +0000
Subject: [PATCH 13/27] up

---
 src/diffusers/models/modeling_utils.py    | 4 ++--
 src/diffusers/pipelines/pipeline_utils.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index 984392c5328f..8cfca195a781 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -146,8 +146,8 @@ def load(module: torch.nn.Module, prefix=""):
 
 def _add_variant(weights_name: str, variant: Optional[str] = None) -> str:
     if variant is not None:
-        splits = os.path.splitext(weights_name)
-        splits = splits[:-1] + (variant,) + splits[-1:]
+        splits = weights_name.split(".")
+        splits = splits[:-1] + [variant] + splits[-1:]
         weights_name = ".".join(splits)
 
     return weights_name
diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py
index 60f48376b386..ff50acf8cde4 100644
--- a/src/diffusers/pipelines/pipeline_utils.py
+++ b/src/diffusers/pipelines/pipeline_utils.py
@@ -529,7 +529,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                     )
                     comp_model_filenames = variant_compatible_siblings(info, variant=revision)
                     comp_model_filenames = [
-                        ".".join(os.path.splitext(f)[:1] + os.path.splitext(f)[2:]) for f in comp_model_filenames
+                        ".".join(f.split(".")[:1] + f.split(".")[2:]) for f in comp_model_filenames
                     ]
 
                     if set(comp_model_filenames) == set(model_filenames):

From 73bf79f2bcdfac1b1d1bf8889eebcfc391e941da Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Tue, 14 Feb 2023 12:55:41 +0000
Subject: [PATCH 14/27] up

---
 src/diffusers/pipelines/pipeline_utils.py | 31 +++++++++++++++++++----
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py
index ff50acf8cde4..cb5a46fe53c9 100644
--- a/src/diffusers/pipelines/pipeline_utils.py
+++ b/src/diffusers/pipelines/pipeline_utils.py
@@ -17,6 +17,7 @@
 import importlib
 import inspect
 import os
+import re
 import warnings
 from dataclasses import dataclass
 from pathlib import Path
@@ -41,6 +42,8 @@
     DEPRECATED_REVISION_ARGS,
     DIFFUSERS_CACHE,
     HF_HUB_OFFLINE,
+    SAFETENSORS_WEIGHTS_NAME,
+    WEIGHTS_NAME,
     BaseOutput,
     deprecate,
     get_class_from_dynamic_module,
@@ -56,6 +59,11 @@
 if is_transformers_available():
     import transformers
     from transformers import PreTrainedModel
+    from transformers.utils import FLAX_WEIGHTS_NAME as TRANSFORMERS_FLAX_WEIGHTS_NAME
+    from transformers.utils import SAFE_WEIGHTS_NAME as TRANSFORMERS_SAFE_WEIGHTS_NAME
+    from transformers.utils import WEIGHTS_NAME as TRANSFORMERS_WEIGHTS_NAME
+
+from ..utils import FLAX_WEIGHTS_NAME, ONNX_WEIGHTS_NAME
 
 
 INDEX_FILE = "diffusion_pytorch_model.bin"
@@ -140,16 +148,29 @@ def is_safetensors_compatible(filenames, variant=None) -> bool:
 
 def variant_compatible_siblings(info, variant=None) -> Union[List[os.PathLike], str]:
     filenames = set(sibling.rfilename for sibling in info.siblings)
-    save_formats = ["bin", "safetensors", "msgpack", "onnx"]
+    weight_names = [WEIGHTS_NAME, SAFETENSORS_WEIGHTS_NAME, FLAX_WEIGHTS_NAME, ONNX_WEIGHTS_NAME]
+
+    if is_transformers_available():
+        weight_names += [TRANSFORMERS_WEIGHTS_NAME, TRANSFORMERS_SAFE_WEIGHTS_NAME, TRANSFORMERS_FLAX_WEIGHTS_NAME]
+
+    # model_pytorch, diffusion_model_pytorch, ...
+    weight_prefixes = [w.split(".")[0] for w in weight_names]
+    # .bin, .safetensors, ...
+    weight_suffixs = [w.split(".")[-1] for w in weight_names]
+
+    variant_file_regex = (
+        re.compile(f"({'|'.join(weight_prefixes)})({variant})({'|'.join(weight_suffixs)})")
+        if variant is not None
+        else None
+    )
+    non_variant_file_regex = re.compile(f"{'|'.join(weight_names)}")
 
     if variant is not None:
-        variant_filenames = set(f for f in filenames if any(f.endswith(f"{variant}.{s}") for s in save_formats))
+        variant_filenames = set(f for f in filenames if variant_file_regex.match(f.split("/")[-1]) is not None)
     else:
         variant_filenames = set()
 
-    non_variant_filenames = set(
-        f for f in filenames if (len(f.split(".")) == 2 and any(f.endswith(f".{s}") for s in save_formats))
-    )
+    non_variant_filenames = set(f for f in filenames if non_variant_file_regex.match(f.split("/")[-1]) is not None)
 
     usable_filenames = set(variant_filenames)
     for f in non_variant_filenames:

From 48226f70f3bc423134b9e3ba79599d8565826dd7 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Tue, 14 Feb 2023 15:23:20 +0200
Subject: [PATCH 15/27] up

---
 docs/source/en/using-diffusers/loading.mdx    | 182 ++++++++++++++++++
 examples/dreambooth/train_dreambooth.py       |   2 +-
 examples/text_to_image/train_text_to_image.py |   2 +-
 .../train_unconditional.py                    |   2 +-
 src/diffusers/models/modeling_utils.py        |   2 +-
 5 files changed, 186 insertions(+), 4 deletions(-)

diff --git a/docs/source/en/using-diffusers/loading.mdx b/docs/source/en/using-diffusers/loading.mdx
index 52200096f501..184ec02004d5 100644
--- a/docs/source/en/using-diffusers/loading.mdx
+++ b/docs/source/en/using-diffusers/loading.mdx
@@ -242,9 +242,153 @@ pipeline under a `"fp16"` variant as follows:
 pipe.save_pretrained("./stable-diffusion-v1-5", variant="fp16")
 ```
 
+If we don't save into an existing `stable-diffusion-v1-5` folder the new folder would look as follows:
 
+```
+stable-diffusion-v1-5
+├── feature_extractor
+│   └── preprocessor_config.json
+├── model_index.json
+├── safety_checker
+│   ├── config.json
+│   └── pytorch_model.fp16.bin
+├── scheduler
+│   └── scheduler_config.json
+├── text_encoder
+│   ├── config.json
+│   └── pytorch_model.fp16.bin
+├── tokenizer
+│   ├── merges.txt
+│   ├── special_tokens_map.json
+│   ├── tokenizer_config.json
+│   └── vocab.json
+├── unet
+│   ├── config.json
+│   └── diffusion_pytorch_model.fp16.bin
+└── vae
+    ├── config.json
+    └── diffusion_pytorch_model.fp16.bin
+```
 
+As one can see, all model files now have a `.fp16.bin` extension instead of just `.bin`.
+The variant now has to be loaded by also passing a `variant="fp16"` to [`DiffusionPipeline.from_pretrained`], e.g.:
+
+
+```py
+DiffusionPipeline.from_pretrained("./stable-diffusion-v1-5", variant="fp16", dtype=torch.float16)
+```
+
+works just fine, while:
+
+```py
+DiffusionPipeline.from_pretrained("./stable-diffusion-v1-5", dtype=torch.float16)
+```
+
+throws an Exception:
+```
+OSError: Error no file named diffusion_pytorch_model.bin found in directory ./stable-diffusion-v1-45/vae since we **only** stored the model 
+```
 
+This is expected as we don't have any "non-variant" checkpoint files saved locally.
+However, the whole idea of pipeline variants is that they can co-exist with the "main" variant, 
+so one would typically also save the "main" variant in the same folder. Let's do this:
+
+```py
+pipe = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
+pipe.save_pretrained("./stable-diffusion-v1-5")
+```
+
+and upload the pipeline to the Hub under [diffusers/stable-diffusion-variants](https://huggingface.co/diffusers/stable-diffusion-variants).
+The file structure [on the Hub](https://huggingface.co/diffusers/stable-diffusion-variants/tree/main) now looks as follows:
+
+```py
+├── feature_extractor
+│   └── preprocessor_config.json
+├── model_index.json
+├── safety_checker
+│   ├── config.json
+│   ├── pytorch_model.bin
+│   └── pytorch_model.fp16.bin
+├── scheduler
+│   └── scheduler_config.json
+├── text_encoder
+│   ├── config.json
+│   ├── pytorch_model.bin
+│   └── pytorch_model.fp16.bin
+├── tokenizer
+│   ├── merges.txt
+│   ├── special_tokens_map.json
+│   ├── tokenizer_config.json
+│   └── vocab.json
+├── unet
+│   ├── config.json
+│   ├── diffusion_pytorch_model.bin
+│   ├── diffusion_pytorch_model.fp16.bin
+└── vae
+    ├── config.json
+    ├── diffusion_pytorch_model.bin
+    └── diffusion_pytorch_model.fp16.bin
+```
+
+We can now both download the "main" and the "fp16" variant from the Hub. Both:
+
+```py
+pipe = DiffusionPipeline.from_pretrained("diffusers/stable-diffusion-variants")
+```
+
+and 
+
+```py
+pipe = DiffusionPipeline.from_pretrained("diffusers/stable-diffusion-variants", variant="fp16")
+```
+
+works.
+
+<Tip>
+
+Note that Diffusers never downloads more checkpoints that needed. E.g. when downloading 
+the "main" variant, none of the "fp16.bin" files are downloaded and cached.
+Only when the user specificies `variant="fp16"` are those files downloaded and cached.
+
+</Tip>
+
+Finally, there are cases where only some of the checkpoint files of the pipeline are of a certain 
+variation. E.g. it's usually only the UNet checkpoint that has both a *exponential-mean-averaged* (EMA) and a *non-exponential-mean-averaged* (non-EMA) version. All other model components, e.g. the text encoder, safety checker or variational auto-encoder usually don't have such a variation.
+In such a case, one would upload just the UNet's checkpoint file with a `non_ema` version format (as done [here](https://huggingface.co/diffusers/stable-diffusion-variants/blob/main/unet/diffusion_pytorch_model.non_ema.bin)) and upon calling:
+
+```python
+pipe = DiffusionPipeline.from_pretrained("diffusers/stable-diffusion-variants", variant="non_ema")
+```
+
+the model will use only the "non_ema" checkpoint variant if it is available - otherwise it'll load the 
+"main" variation. In the above example, `variant="non_ema"` would therefore download the following file structure:
+
+```
+├── feature_extractor
+│   └── preprocessor_config.json
+├── model_index.json
+├── safety_checker
+│   ├── config.json
+│   ├── pytorch_model.bin
+├── scheduler
+│   └── scheduler_config.json
+├── text_encoder
+│   ├── config.json
+│   ├── pytorch_model.bin
+├── tokenizer
+│   ├── merges.txt
+│   ├── special_tokens_map.json
+│   ├── tokenizer_config.json
+│   └── vocab.json
+├── unet
+│   ├── config.json
+│   └── diffusion_pytorch_model.non_ema.bin
+└── vae
+    ├── config.json
+    ├── diffusion_pytorch_model.bin
+```
+
+In a nutshell, using `variant="{variant}"` will download all files that match the `{variant}` and if for a model component such a file variant is not present it will download the "main" variant. If neither a "main" or `{variant}` variant is available, an error will the thrown.
 
 ### How does loading work?
 
@@ -388,6 +532,32 @@ In the case of `runwayml/stable-diffusion-v1-5` the `model_index.json` is theref
 	- The `"library"` field corresponds to the name of the library, *e.g.* `diffusers` or `transformers` from which the `"class"` should be loaded
 	- The `"class"` field corresponds to the name of the class, *e.g.* [`CLIPTokenizer`](https://huggingface.co/docs/transformers/main/en/model_doc/clip#transformers.CLIPTokenizer) or [`UNet2DConditionModel`]
 
+<!--
+TODO(Patrick) - Make sure to uncomment this part as soon as things are deprecated.
+
+#### Using `revision` to load pipeline variants is deprecated
+
+Previously the `revision` argument of [`DiffusionPipeline.from_pretrained`] was heavily used to 
+load model variants, e.g.:
+
+```python
+from diffusers import DiffusionPipeline
+
+pipe = DiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", revision="fp16")
+```
+
+However, this behavior is now deprecated since the "revision" argument should (just as it's done in GitHub) better be used to load model checkpoints from a specific commit or branch in development.
+
+The above example is therefore deprecated and won't be supported anymore for `diffusers >= 1.0.0`.
+
+<Tip warning={true}>
+
+If you load diffusers pipelines or models with `revision="fp16"` or `revision="non_ema"`, 
+please make sure to update to code and use `variant="fp16"` or `variation="non_ema"` respectively
+instead.
+
+</Tip>
+-->
 
 ## Loading models
 
@@ -427,6 +597,18 @@ repo_id = "google/ddpm-cifar10-32"
 model = UNet2DModel.from_pretrained(repo_id)
 ```
 
+As motivated in [How to save and load variants?](#how-to-save-and-load-variants), models can load and 
+save variants. To load a model variant, one should pass the `variant` function argument to [`ModelMixin.from_pretrained`]. Analogeous, to save a model variant, one should psas the `variant` function argument to [`ModelMixin.save_pretrained`]:
+
+```python
+from diffusers import UNet2DConditionModel
+
+model = UNet2DConditionModel.from_pretrained(
+    "diffusers/stable-diffusion-variants", subfolder="unet", variant="non_ema"
+)
+model.save_pretrained("./local-unet", variant="non_ema")
+```
+
 ## Loading schedulers
 
 Schedulers rely on [`SchedulerMixin.from_pretrained`]. Schedulers are **not parameterized** or **trained**, but instead purely defined by a configuration file.
diff --git a/examples/dreambooth/train_dreambooth.py b/examples/dreambooth/train_dreambooth.py
index 880f4a51f611..9fe075f5f7e1 100644
--- a/examples/dreambooth/train_dreambooth.py
+++ b/examples/dreambooth/train_dreambooth.py
@@ -609,7 +609,7 @@ def main(args):
     )
 
     # `accelerate` 0.16.0 will have better support for customized saving
-    if version.parse(accelerate.__version__) >= version.parse("0.16.0"):
+    if version.parse(accelerate.__version__) >= version.parse("0.15.0"):
         # create custom saving & loading hooks so that `accelerator.save_state(...)` serializes in a nice format
         def save_model_hook(models, weights, output_dir):
             for model in models:
diff --git a/examples/text_to_image/train_text_to_image.py b/examples/text_to_image/train_text_to_image.py
index 39089a85680f..cb1bee0e843a 100644
--- a/examples/text_to_image/train_text_to_image.py
+++ b/examples/text_to_image/train_text_to_image.py
@@ -409,7 +409,7 @@ def main():
             raise ValueError("xformers is not available. Make sure it is installed correctly")
 
     # `accelerate` 0.16.0 will have better support for customized saving
-    if version.parse(accelerate.__version__) >= version.parse("0.16.0"):
+    if version.parse(accelerate.__version__) >= version.parse("0.15.0"):
         # create custom saving & loading hooks so that `accelerator.save_state(...)` serializes in a nice format
         def save_model_hook(models, weights, output_dir):
             if args.use_ema:
diff --git a/examples/unconditional_image_generation/train_unconditional.py b/examples/unconditional_image_generation/train_unconditional.py
index 64ba126d0cce..556586c93f26 100644
--- a/examples/unconditional_image_generation/train_unconditional.py
+++ b/examples/unconditional_image_generation/train_unconditional.py
@@ -281,7 +281,7 @@ def main(args):
     )
 
     # `accelerate` 0.16.0 will have better support for customized saving
-    if version.parse(accelerate.__version__) >= version.parse("0.16.0"):
+    if version.parse(accelerate.__version__) >= version.parse("0.15.0"):
         # create custom saving & loading hooks so that `accelerator.save_state(...)` serializes in a nice format
         def save_model_hook(models, weights, output_dir):
             if args.use_ema:
diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index 984392c5328f..4c97777e2a91 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -823,7 +823,7 @@ def _get_model_file(
         try:
             if revision in DEPRECATED_REVISION_ARGS and version.parse(
                 version.parse(__version__).base_version
-            ) >= version.parse("0.16.0"):
+            ) >= version.parse("0.15.0"):
                 variant = _add_variant(weights_name, revision)
 
                 try:

From 23ace69c8372b4c42cf5d00a9c60dce5d5bc48f3 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Tue, 14 Feb 2023 13:29:18 +0000
Subject: [PATCH 16/27] fix tests

---
 src/diffusers/pipelines/pipeline_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py
index cb5a46fe53c9..0301e2984b04 100644
--- a/src/diffusers/pipelines/pipeline_utils.py
+++ b/src/diffusers/pipelines/pipeline_utils.py
@@ -159,7 +159,7 @@ def variant_compatible_siblings(info, variant=None) -> Union[List[os.PathLike],
     weight_suffixs = [w.split(".")[-1] for w in weight_names]
 
     variant_file_regex = (
-        re.compile(f"({'|'.join(weight_prefixes)})({variant})({'|'.join(weight_suffixs)})")
+        re.compile(f"({'|'.join(weight_prefixes)})(.{variant}.)({'|'.join(weight_suffixs)})")
         if variant is not None
         else None
     )

From 9e09f6219865824c315d75be36069a0c137d1a48 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Tue, 14 Feb 2023 14:32:10 +0100
Subject: [PATCH 17/27] Apply suggestions from code review

Co-authored-by: Pedro Cuenca <pedro@huggingface.co>
---
 docs/source/en/using-diffusers/loading.mdx | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/source/en/using-diffusers/loading.mdx b/docs/source/en/using-diffusers/loading.mdx
index 52200096f501..c969f410da82 100644
--- a/docs/source/en/using-diffusers/loading.mdx
+++ b/docs/source/en/using-diffusers/loading.mdx
@@ -59,7 +59,7 @@ pipe = StableDiffusionImg2ImgPipeline.from_pretrained(repo_id)
 </Tip>
 
 
-Diffusion pipelines like `StableDiffusionImg2ImgPipeline` consist of multiple components. These components can be both parameterized models, such as `"unet"`, `"vae"` and "text_encoder", tokenizers or schedulers. 
+Diffusion pipelines like `StableDiffusionPipeline` or `StableDiffusionImg2ImgPipeline` consist of multiple components. These components can be both parameterized models, such as `"unet"`, `"vae"` and `"text_encoder"`, tokenizers or schedulers. 
 These components often interact in complex ways with each other when using the pipeline in inference, *e.g.* for [`StableDiffusionPipeline`] the inference call is explained [here](https://huggingface.co/blog/stable_diffusion#how-does-stable-diffusion-work).
 The purpose of the [pipeline classes](./api/overview#diffusers-summary) is to wrap the complexity of these diffusion systems and give the user an easy-to-use API while staying flexible for customization, as will be shown later.
 
@@ -205,20 +205,20 @@ Such checkpoint variants are usually variations of the checkpoint that have adva
 
 #### Let's first talk about what checkpoint variants are **not**.
 
-Checkpoint variants do **not** include different serialization format (such as [safetensors](https://huggingface.co/docs/diffusers/main/en/using-diffusers/using_safetensors)) as different serialization formats are 
+Checkpoint variants do **not** include different serialization formats (such as [safetensors](https://huggingface.co/docs/diffusers/main/en/using-diffusers/using_safetensors)) as different serialization formats are 
 identical to the "main" serialization format, just loaded with different code.
 
 Also variants do not correspond to different checkpoint layouts, *e.g.* [stable-diffusion-v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5) is not a variant of [stable-diffusion-2-0](https://huggingface.co/stabilityai/stable-diffusion-2) since the checkpoint layout is different (Stable Diffusion 1-5 uses a different `CLIPTextModel` compared to Stable Diffusion 2.0).
 
-Pipeline checkpoints that are identical in checkpoint layout, but have been trained on different datasets, trained with vastly diffenent training setups and thus correspond to different official releases (such as [Stable Diffusion v1-4](https://huggingface.co/CompVis/stable-diffusion-v1-4) and [Stable Diffusion v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5)) should probably also not be stored as variations of each other, but rather as individual model repositories.
+Pipeline checkpoints that are identical in checkpoint layout, but have been trained on different datasets, trained with vastly different training setups and thus correspond to different official releases (such as [Stable Diffusion v1-4](https://huggingface.co/CompVis/stable-diffusion-v1-4) and [Stable Diffusion v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5)) should probably also not be stored as variations of each other, but rather as individual model repositories.
 
 #### So what are checkpoint variants then? 
 
-Checkpoint variants usually consists of the checkpoint stored in "*low-precision, low-storage*" dtype so that less bandwith is required to download them or of *non-exponential-averaged* weights that shall be used when continuing fine-tuning the checkpoint.
-Both such use case have clear advantages in certain settings, have the same serialization format, and correspond to an adaption of the "main" checkpoint which does not warrant a new model repository. 
+Checkpoint variants usually consist of the checkpoint stored in "*low-precision, low-storage*" dtype so that less bandwith is required to download them, or of *non-exponential-averaged* weights that shall be used when continuing fine-tuning from the checkpoint.
+Both use cases have clear advantages when their weights are considered variants: they share the same serialization format as the reference weights, and they correspond to an specialization of the "main" checkpoint which does not warrant a new model repository. 
 A checkpoint stored in [torch's half-precision / float16 format](https://pytorch.org/blog/accelerating-training-on-nvidia-gpus-with-pytorch-automatic-mixed-precision/) requires only half the bandwith and storage when downloading the checkpoint,
-**but** cannot be used when continuing training the checkpoint or when running the checkpoint on CPU.
-Similarly the *non-exponential-averaged* version of the checkpoint should be used when continuing fine-tuning of the model checkpoint, **but** should not be used when using the checkpoint for inference. 
+**but** cannot be used when continuing training or when running the checkpoint on CPU.
+Similarly the *non-exponential-averaged* (or non-EMA) version of the checkpoint should be used when continuing fine-tuning of the model checkpoint, **but** should not be used when using the checkpoint for inference. 
 
 #### How to save and load variants
 
@@ -226,7 +226,7 @@ Saving a diffusion pipeline as a variant can be done by providing [`DiffusionPip
 The `variant` extends the weight name by the provided variation, by changing the default weight name from `diffusion_pytorch_model.bin` to `diffusion_pytorch_model.{variant}.bin` or from `diffusion_pytorch_model.safetensors` to `diffusion_pytorch_model.{variant}.safetensors`. By doing so, one creates a variant of the pipeline checkpoint that can be loaded **instead** of the "main" pipeline checkpoint.
 
 Let's have a look at how we could create a float16 variant of a pipeline. First, we load 
-the "main" variant of a checkpoint into mixed precision format, using `dtype=torch.float16`.
+the "main" variant of a checkpoint (stored in `float32` precision) into mixed precision format, using `dtype=torch.float16`.
 
 ```py
 from diffusers import DiffusionPipeline

From 70cf0402723584217b7ad884b49b02dce9e99124 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Tue, 14 Feb 2023 14:33:46 +0100
Subject: [PATCH 18/27] Apply suggestions from code review

Co-authored-by: Pedro Cuenca <pedro@huggingface.co>
---
 docs/source/en/using-diffusers/loading.mdx                  | 6 +++---
 examples/dreambooth/train_dreambooth.py                     | 2 +-
 examples/text_to_image/train_text_to_image.py               | 2 +-
 .../unconditional_image_generation/train_unconditional.py   | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/source/en/using-diffusers/loading.mdx b/docs/source/en/using-diffusers/loading.mdx
index c94f5512fc7a..0da76a5c7d00 100644
--- a/docs/source/en/using-diffusers/loading.mdx
+++ b/docs/source/en/using-diffusers/loading.mdx
@@ -397,7 +397,7 @@ As a class method, [`DiffusionPipeline.from_pretrained`] is responsible for two
 - Load the cached weights into the _correct_ pipeline class – one of the [officially supported pipeline classes](./api/overview#diffusers-summary) - and return an instance of the class. The _correct_ pipeline class is thereby retrieved from the `model_index.json` file.
 
 The underlying folder structure of diffusion pipelines correspond 1-to-1 to their corresponding class instances, *e.g.* [`StableDiffusionPipeline`] for [`runwayml/stable-diffusion-v1-5`](https://huggingface.co/runwayml/stable-diffusion-v1-5)
-This can be understood better by looking at an example. Let's print out pipeline class instance `pipeline` we just defined:
+This can be better understood by looking at an example. Let's load a pipeline class instance `pipe` and print it:
 
 ```python
 from diffusers import DiffusionPipeline
@@ -442,10 +442,10 @@ StableDiffusionPipeline {
 ```
 
 First, we see that the official pipeline is the [`StableDiffusionPipeline`], and second we see that the `StableDiffusionPipeline` consists of 7 components:
-- `"feature_extractor"` of class `CLIPFeatureExtractor` as defined [in `transformes`](https://huggingface.co/docs/transformers/main/en/model_doc/clip#transformers.CLIPFeatureExtractor).
+- `"feature_extractor"` of class `CLIPFeatureExtractor` as defined [in `transformers`](https://huggingface.co/docs/transformers/main/en/model_doc/clip#transformers.CLIPFeatureExtractor).
 - `"safety_checker"` as defined [here](https://github.com/huggingface/diffusers/blob/e55687e1e15407f60f32242027b7bb8170e58266/src/diffusers/pipelines/stable_diffusion/safety_checker.py#L32).
 - `"scheduler"` of class [`PNDMScheduler`].
-- `"text_encoder"` of class `CLIPTextModel` as defined [in `transformes`](https://huggingface.co/docs/transformers/main/en/model_doc/clip#transformers.CLIPTextModel).
+- `"text_encoder"` of class `CLIPTextModel` as defined [in `transformers`](https://huggingface.co/docs/transformers/main/en/model_doc/clip#transformers.CLIPTextModel).
 - `"tokenizer"` of class `CLIPTokenizer` as defined [in `transformers`](https://huggingface.co/docs/transformers/main/en/model_doc/clip#transformers.CLIPTokenizer).
 - `"unet"` of class [`UNet2DConditionModel`].
 - `"vae"` of class [`AutoencoderKL`].
diff --git a/examples/dreambooth/train_dreambooth.py b/examples/dreambooth/train_dreambooth.py
index 9fe075f5f7e1..880f4a51f611 100644
--- a/examples/dreambooth/train_dreambooth.py
+++ b/examples/dreambooth/train_dreambooth.py
@@ -609,7 +609,7 @@ def main(args):
     )
 
     # `accelerate` 0.16.0 will have better support for customized saving
-    if version.parse(accelerate.__version__) >= version.parse("0.15.0"):
+    if version.parse(accelerate.__version__) >= version.parse("0.16.0"):
         # create custom saving & loading hooks so that `accelerator.save_state(...)` serializes in a nice format
         def save_model_hook(models, weights, output_dir):
             for model in models:
diff --git a/examples/text_to_image/train_text_to_image.py b/examples/text_to_image/train_text_to_image.py
index cb1bee0e843a..39089a85680f 100644
--- a/examples/text_to_image/train_text_to_image.py
+++ b/examples/text_to_image/train_text_to_image.py
@@ -409,7 +409,7 @@ def main():
             raise ValueError("xformers is not available. Make sure it is installed correctly")
 
     # `accelerate` 0.16.0 will have better support for customized saving
-    if version.parse(accelerate.__version__) >= version.parse("0.15.0"):
+    if version.parse(accelerate.__version__) >= version.parse("0.16.0"):
         # create custom saving & loading hooks so that `accelerator.save_state(...)` serializes in a nice format
         def save_model_hook(models, weights, output_dir):
             if args.use_ema:
diff --git a/examples/unconditional_image_generation/train_unconditional.py b/examples/unconditional_image_generation/train_unconditional.py
index 556586c93f26..64ba126d0cce 100644
--- a/examples/unconditional_image_generation/train_unconditional.py
+++ b/examples/unconditional_image_generation/train_unconditional.py
@@ -281,7 +281,7 @@ def main(args):
     )
 
     # `accelerate` 0.16.0 will have better support for customized saving
-    if version.parse(accelerate.__version__) >= version.parse("0.15.0"):
+    if version.parse(accelerate.__version__) >= version.parse("0.16.0"):
         # create custom saving & loading hooks so that `accelerator.save_state(...)` serializes in a nice format
         def save_model_hook(models, weights, output_dir):
             if args.use_ema:

From 501446dd315521dd29ed9ead201d2fe80adb9100 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Tue, 14 Feb 2023 15:38:44 +0200
Subject: [PATCH 19/27] correct code format

---
 docs/source/en/using-diffusers/loading.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/en/using-diffusers/loading.mdx b/docs/source/en/using-diffusers/loading.mdx
index c94f5512fc7a..18f03ae9170e 100644
--- a/docs/source/en/using-diffusers/loading.mdx
+++ b/docs/source/en/using-diffusers/loading.mdx
@@ -301,7 +301,7 @@ pipe.save_pretrained("./stable-diffusion-v1-5")
 and upload the pipeline to the Hub under [diffusers/stable-diffusion-variants](https://huggingface.co/diffusers/stable-diffusion-variants).
 The file structure [on the Hub](https://huggingface.co/diffusers/stable-diffusion-variants/tree/main) now looks as follows:
 
-```py
+```
 ├── feature_extractor
 │   └── preprocessor_config.json
 ├── model_index.json

From 61f7ff249a808bc5c460ca017991593c3a785de0 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Tue, 14 Feb 2023 15:58:23 +0200
Subject: [PATCH 20/27] fix warning

---
 src/diffusers/pipelines/pipeline_utils.py | 25 +++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py
index 0301e2984b04..1f3be3616462 100644
--- a/src/diffusers/pipelines/pipeline_utils.py
+++ b/src/diffusers/pipelines/pipeline_utils.py
@@ -178,12 +178,7 @@ def variant_compatible_siblings(info, variant=None) -> Union[List[os.PathLike],
         if variant_filename not in usable_filenames:
             usable_filenames.add(f)
 
-    if len(variant_filenames) > 0 and usable_filenames != variant_filenames:
-        logger.warn(
-            f"\nA mixture of {variant} and non-{variant} filenames will be loaded.\nLoaded {variant} filenames:\n[{', '.join(variant_filenames)}]\nLoaded non-{variant} filenames:\n[{', '.join(usable_filenames - variant_filenames)} from repository files: {', '.join(filenames)}]\nIf this behavior is not expected, please check your folder structure."
-        )
-
-    return usable_filenames
+    return usable_filenames, variant_filenames
 
 
 class DiffusionPipeline(ConfigMixin):
@@ -537,7 +532,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                     use_auth_token=use_auth_token,
                     revision=revision,
                 )
-                model_filenames = variant_compatible_siblings(info, variant=variant)
+                model_filenames, variant_filenames = variant_compatible_siblings(info, variant=variant)
                 model_folder_names = set([os.path.split(f)[0] for f in model_filenames])
 
                 if revision in DEPRECATED_REVISION_ARGS and version.parse(
@@ -548,7 +543,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                         use_auth_token=use_auth_token,
                         revision=None,
                     )
-                    comp_model_filenames = variant_compatible_siblings(info, variant=revision)
+                    comp_model_filenames, _ = variant_compatible_siblings(info, variant=revision)
                     comp_model_filenames = [
                         ".".join(f.split(".")[:1] + f.split(".")[2:]) for f in comp_model_filenames
                     ]
@@ -584,9 +579,23 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                     ignore_patterns = ["*.bin", "*.safetensors", ".onnx"]
                 elif is_safetensors_available() and is_safetensors_compatible(model_filenames, variant=variant):
                     ignore_patterns = ["*.bin", "*.msgpack"]
+
+                    bin_variant_filenames = [f for f in variant_filenames if f.endswith(".bin")]
+                    bin_model_filenames = [f for f in model_filenames if f.endswith(".bin")]
+                    if len(bin_variant_filenames) > 0 and bin_model_filenames != bin_variant_filenames:
+                        logger.warn(
+                            f"\nA mixture of {variant} and non-{variant} filenames will be loaded.\nLoaded {variant} filenames:\n[{', '.join(bin_variant_filenames)}]\nLoaded non-{variant} filenames:\n[{', '.join(bin_model_filenames - bin_variant_filenames)}\nIf this behavior is not expected, please check your folder structure."
+                        )
+
                 else:
                     ignore_patterns = ["*.safetensors", "*.msgpack"]
 
+                    onnx_variant_filenames = [f for f in variant_filenames if f.endswith(".onnx")]
+                    onnx_model_filenames = [f for f in model_filenames if f.endswith(".onnx")]
+                    if len(onnx_variant_filenames) > 0 and onnx_model_filenames != onnx_variant_filenames:
+                        logger.warn(
+                            f"\nA mixture of {variant} and non-{variant} filenames will be loaded.\nLoaded {variant} filenames:\n[{', '.join(onnx_variant_filenames)}]\nLoaded non-{variant} filenames:\n[{', '.join(onnx_model_filenames - onnx_variant_filenames)}\nIf this behavior is not expected, please check your folder structure."
+                        )
             else:
                 # allow everything since it has to be downloaded anyways
                 ignore_patterns = allow_patterns = None

From 81dc1073816caa1ee974827f501bc83a20200001 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Tue, 14 Feb 2023 14:30:21 +0000
Subject: [PATCH 21/27] finish

---
 src/diffusers/pipelines/pipeline_utils.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py
index 1f3be3616462..7fe1fbddcf56 100644
--- a/src/diffusers/pipelines/pipeline_utils.py
+++ b/src/diffusers/pipelines/pipeline_utils.py
@@ -580,22 +580,23 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                 elif is_safetensors_available() and is_safetensors_compatible(model_filenames, variant=variant):
                     ignore_patterns = ["*.bin", "*.msgpack"]
 
-                    bin_variant_filenames = [f for f in variant_filenames if f.endswith(".bin")]
-                    bin_model_filenames = [f for f in model_filenames if f.endswith(".bin")]
-                    if len(bin_variant_filenames) > 0 and bin_model_filenames != bin_variant_filenames:
+                    onnx_variant_filenames = set([f for f in variant_filenames if f.endswith(".onnx")])
+                    onnx_model_filenames = set([f for f in model_filenames if f.endswith(".onnx")])
+                    if len(onnx_variant_filenames) > 0 and onnx_model_filenames != onnx_variant_filenames:
                         logger.warn(
-                            f"\nA mixture of {variant} and non-{variant} filenames will be loaded.\nLoaded {variant} filenames:\n[{', '.join(bin_variant_filenames)}]\nLoaded non-{variant} filenames:\n[{', '.join(bin_model_filenames - bin_variant_filenames)}\nIf this behavior is not expected, please check your folder structure."
+                            f"\nA mixture of {variant} and non-{variant} filenames will be loaded.\nLoaded {variant} filenames:\n[{', '.join(onnx_variant_filenames)}]\nLoaded non-{variant} filenames:\n[{', '.join(onnx_model_filenames - onnx_variant_filenames)}\nIf this behavior is not expected, please check your folder structure."
                         )
 
                 else:
                     ignore_patterns = ["*.safetensors", "*.msgpack"]
 
-                    onnx_variant_filenames = [f for f in variant_filenames if f.endswith(".onnx")]
-                    onnx_model_filenames = [f for f in model_filenames if f.endswith(".onnx")]
-                    if len(onnx_variant_filenames) > 0 and onnx_model_filenames != onnx_variant_filenames:
+                    bin_variant_filenames = set([f for f in variant_filenames if f.endswith(".bin")])
+                    bin_model_filenames = set([f for f in model_filenames if f.endswith(".bin")])
+                    if len(bin_variant_filenames) > 0 and bin_model_filenames != bin_variant_filenames:
                         logger.warn(
-                            f"\nA mixture of {variant} and non-{variant} filenames will be loaded.\nLoaded {variant} filenames:\n[{', '.join(onnx_variant_filenames)}]\nLoaded non-{variant} filenames:\n[{', '.join(onnx_model_filenames - onnx_variant_filenames)}\nIf this behavior is not expected, please check your folder structure."
+                            f"\nA mixture of {variant} and non-{variant} filenames will be loaded.\nLoaded {variant} filenames:\n[{', '.join(bin_variant_filenames)}]\nLoaded non-{variant} filenames:\n[{', '.join(bin_model_filenames - bin_variant_filenames)}\nIf this behavior is not expected, please check your folder structure."
                         )
+
             else:
                 # allow everything since it has to be downloaded anyways
                 ignore_patterns = allow_patterns = None

From f26abeb3999a9e8268c270028846bcbcc2b5a613 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Thu, 16 Feb 2023 09:54:30 +0100
Subject: [PATCH 22/27] Apply suggestions from code review

Co-authored-by: Suraj Patil <surajp815@gmail.com>
---
 docs/source/en/using-diffusers/loading.mdx | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/source/en/using-diffusers/loading.mdx b/docs/source/en/using-diffusers/loading.mdx
index 7dd8f88048eb..08b7d2ddf576 100644
--- a/docs/source/en/using-diffusers/loading.mdx
+++ b/docs/source/en/using-diffusers/loading.mdx
@@ -201,9 +201,10 @@ Note how the above code snippet makes use of [`DiffusionPipeline.components`].
 ### Loading variants
 
 Diffusion Pipeline checkpoints can offer variants of the "main" diffusion pipeline checkpoint.
-Such checkpoint variants are usually variations of the checkpoint that have advantages for specific use-cases and that are so similar to the "main" checkpoint that they **should not** be put in a new checkpoint. A variation of a checkpoint has to have **exactly** the same checkpoint layout including all weights having the same tensor shapes.
+Such checkpoint variants are usually variations of the checkpoint that have advantages for specific use-cases and that are so similar to the "main" checkpoint that they **should not** be put in a new checkpoint.
+A variation of a checkpoint has to have **exactly** the same serialization format and **exactly** the same checkpoint layout, including all weights having the same tensor shapes.
 
-#### Let's first talk about what checkpoint variants are **not**.
+#### Let's first talk about whats **not** checkpoint variant,
 
 Checkpoint variants do **not** include different serialization formats (such as [safetensors](https://huggingface.co/docs/diffusers/main/en/using-diffusers/using_safetensors)) as different serialization formats are 
 identical to the "main" serialization format, just loaded with different code.

From 57fbe8f97c7e66c792271653a26d6d97c3b24fbc Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Thu, 16 Feb 2023 10:01:00 +0100
Subject: [PATCH 23/27] Apply suggestions from code review

Co-authored-by: Suraj Patil <surajp815@gmail.com>
---
 docs/source/en/using-diffusers/loading.mdx | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/source/en/using-diffusers/loading.mdx b/docs/source/en/using-diffusers/loading.mdx
index 08b7d2ddf576..58b893b400ee 100644
--- a/docs/source/en/using-diffusers/loading.mdx
+++ b/docs/source/en/using-diffusers/loading.mdx
@@ -233,7 +233,7 @@ the "main" variant of a checkpoint (stored in `float32` precision) into mixed pr
 from diffusers import DiffusionPipeline
 import torch
 
-pipe = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", dtype=torch.float16)
+pipe = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
 ```
 
 Now all model components of the pipeline are stored in half-precision dtype. We can now save the 
@@ -276,7 +276,7 @@ The variant now has to be loaded by also passing a `variant="fp16"` to [`Diffusi
 
 
 ```py
-DiffusionPipeline.from_pretrained("./stable-diffusion-v1-5", variant="fp16", dtype=torch.float16)
+DiffusionPipeline.from_pretrained("./stable-diffusion-v1-5", variant="fp16", torch_dtype=torch.float16)
 ```
 
 works just fine, while:
@@ -347,9 +347,9 @@ works.
 
 <Tip>
 
-Note that Diffusers never downloads more checkpoints that needed. E.g. when downloading 
+Note that Diffusers never downloads more checkpoints than needed. E.g. when downloading 
 the "main" variant, none of the "fp16.bin" files are downloaded and cached.
-Only when the user specificies `variant="fp16"` are those files downloaded and cached.
+Only when the user specifies `variant="fp16"` are those files downloaded and cached.
 
 </Tip>
 
@@ -599,7 +599,7 @@ model = UNet2DModel.from_pretrained(repo_id)
 ```
 
 As motivated in [How to save and load variants?](#how-to-save-and-load-variants), models can load and 
-save variants. To load a model variant, one should pass the `variant` function argument to [`ModelMixin.from_pretrained`]. Analogeous, to save a model variant, one should psas the `variant` function argument to [`ModelMixin.save_pretrained`]:
+save variants. To load a model variant, one should pass the `variant` function argument to [`ModelMixin.from_pretrained`]. Analogous, to save a model variant, one should pass the `variant` function argument to [`ModelMixin.save_pretrained`]:
 
 ```python
 from diffusers import UNet2DConditionModel

From cbffa779be19510e4c24ff59d6c0e4d91f2db04c Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Thu, 16 Feb 2023 10:01:30 +0100
Subject: [PATCH 24/27] Update docs/source/en/using-diffusers/loading.mdx

Co-authored-by: Suraj Patil <surajp815@gmail.com>
---
 docs/source/en/using-diffusers/loading.mdx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/en/using-diffusers/loading.mdx b/docs/source/en/using-diffusers/loading.mdx
index 58b893b400ee..37d24960a09d 100644
--- a/docs/source/en/using-diffusers/loading.mdx
+++ b/docs/source/en/using-diffusers/loading.mdx
@@ -206,8 +206,8 @@ A variation of a checkpoint has to have **exactly** the same serialization forma
 
 #### Let's first talk about whats **not** checkpoint variant,
 
-Checkpoint variants do **not** include different serialization formats (such as [safetensors](https://huggingface.co/docs/diffusers/main/en/using-diffusers/using_safetensors)) as different serialization formats are 
-identical to the "main" serialization format, just loaded with different code.
+Checkpoint variants do **not** include different serialization formats (such as [safetensors](https://huggingface.co/docs/diffusers/main/en/using-diffusers/using_safetensors)) as weights in different serialization formats are 
+identical to the weights of the "main" checkpoint, just loaded with a different code.
 
 Also variants do not correspond to different checkpoint layouts, *e.g.* [stable-diffusion-v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5) is not a variant of [stable-diffusion-2-0](https://huggingface.co/stabilityai/stable-diffusion-2) since the checkpoint layout is different (Stable Diffusion 1-5 uses a different `CLIPTextModel` compared to Stable Diffusion 2.0).
 

From fb2207880855d092d88a4679aeef9e9c2fe38a7b Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Thu, 16 Feb 2023 10:02:58 +0100
Subject: [PATCH 25/27] Apply suggestions from code review

Co-authored-by: Will Berman <wlbberman@gmail.com>
Co-authored-by: Suraj Patil <surajp815@gmail.com>
---
 docs/source/en/using-diffusers/loading.mdx | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/source/en/using-diffusers/loading.mdx b/docs/source/en/using-diffusers/loading.mdx
index 37d24960a09d..06290bf20bd8 100644
--- a/docs/source/en/using-diffusers/loading.mdx
+++ b/docs/source/en/using-diffusers/loading.mdx
@@ -209,14 +209,14 @@ A variation of a checkpoint has to have **exactly** the same serialization forma
 Checkpoint variants do **not** include different serialization formats (such as [safetensors](https://huggingface.co/docs/diffusers/main/en/using-diffusers/using_safetensors)) as weights in different serialization formats are 
 identical to the weights of the "main" checkpoint, just loaded with a different code.
 
-Also variants do not correspond to different checkpoint layouts, *e.g.* [stable-diffusion-v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5) is not a variant of [stable-diffusion-2-0](https://huggingface.co/stabilityai/stable-diffusion-2) since the checkpoint layout is different (Stable Diffusion 1-5 uses a different `CLIPTextModel` compared to Stable Diffusion 2.0).
+Also variants do not correspond to different model structures, *e.g.* [stable-diffusion-v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5) is not a variant of [stable-diffusion-2-0](https://huggingface.co/stabilityai/stable-diffusion-2) since the model structure is different (Stable Diffusion 1-5 uses a different `CLIPTextModel` compared to Stable Diffusion 2.0).
 
-Pipeline checkpoints that are identical in checkpoint layout, but have been trained on different datasets, trained with vastly different training setups and thus correspond to different official releases (such as [Stable Diffusion v1-4](https://huggingface.co/CompVis/stable-diffusion-v1-4) and [Stable Diffusion v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5)) should probably also not be stored as variations of each other, but rather as individual model repositories.
+Pipeline checkpoints that are identical in checkpoint layout, but have been trained on different datasets, trained with vastly different training setups and thus correspond to different official releases (such as [Stable Diffusion v1-4](https://huggingface.co/CompVis/stable-diffusion-v1-4) and [Stable Diffusion v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5)) should probably be stored in individual repositories instead of as variations of eachother.
 
 #### So what are checkpoint variants then? 
 
 Checkpoint variants usually consist of the checkpoint stored in "*low-precision, low-storage*" dtype so that less bandwith is required to download them, or of *non-exponential-averaged* weights that shall be used when continuing fine-tuning from the checkpoint.
-Both use cases have clear advantages when their weights are considered variants: they share the same serialization format as the reference weights, and they correspond to an specialization of the "main" checkpoint which does not warrant a new model repository. 
+Both use cases have clear advantages when their weights are considered variants: they share the same serialization format as the reference weights, and they correspond to a specialization of the "main" checkpoint which does not warrant a new model repository. 
 A checkpoint stored in [torch's half-precision / float16 format](https://pytorch.org/blog/accelerating-training-on-nvidia-gpus-with-pytorch-automatic-mixed-precision/) requires only half the bandwith and storage when downloading the checkpoint,
 **but** cannot be used when continuing training or when running the checkpoint on CPU.
 Similarly the *non-exponential-averaged* (or non-EMA) version of the checkpoint should be used when continuing fine-tuning of the model checkpoint, **but** should not be used when using the checkpoint for inference. 
@@ -227,7 +227,7 @@ Saving a diffusion pipeline as a variant can be done by providing [`DiffusionPip
 The `variant` extends the weight name by the provided variation, by changing the default weight name from `diffusion_pytorch_model.bin` to `diffusion_pytorch_model.{variant}.bin` or from `diffusion_pytorch_model.safetensors` to `diffusion_pytorch_model.{variant}.safetensors`. By doing so, one creates a variant of the pipeline checkpoint that can be loaded **instead** of the "main" pipeline checkpoint.
 
 Let's have a look at how we could create a float16 variant of a pipeline. First, we load 
-the "main" variant of a checkpoint (stored in `float32` precision) into mixed precision format, using `dtype=torch.float16`.
+the "main" variant of a checkpoint (stored in `float32` precision) into mixed precision format, using `torch_dtype=torch.float16`.
 
 ```py
 from diffusers import DiffusionPipeline

From dbdd126095fd95a55a9202d43b9cadfc5c5f6b22 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Thu, 16 Feb 2023 11:06:25 +0200
Subject: [PATCH 26/27] correct loading docs

---
 docs/source/en/using-diffusers/loading.mdx | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/docs/source/en/using-diffusers/loading.mdx b/docs/source/en/using-diffusers/loading.mdx
index 06290bf20bd8..1d28b8771044 100644
--- a/docs/source/en/using-diffusers/loading.mdx
+++ b/docs/source/en/using-diffusers/loading.mdx
@@ -202,16 +202,18 @@ Note how the above code snippet makes use of [`DiffusionPipeline.components`].
 
 Diffusion Pipeline checkpoints can offer variants of the "main" diffusion pipeline checkpoint.
 Such checkpoint variants are usually variations of the checkpoint that have advantages for specific use-cases and that are so similar to the "main" checkpoint that they **should not** be put in a new checkpoint.
-A variation of a checkpoint has to have **exactly** the same serialization format and **exactly** the same checkpoint layout, including all weights having the same tensor shapes.
+A variation of a checkpoint has to have **exactly** the same serialization format and **exactly** the same model structure, including all weights having the same tensor shapes.
+
+Examples of variations are different floating point types and non-ema weights. I.e. "fp16", "bf16", and "no_ema" are common variations.
 
 #### Let's first talk about whats **not** checkpoint variant,
 
 Checkpoint variants do **not** include different serialization formats (such as [safetensors](https://huggingface.co/docs/diffusers/main/en/using-diffusers/using_safetensors)) as weights in different serialization formats are 
-identical to the weights of the "main" checkpoint, just loaded with a different code.
+identical to the weights of the "main" checkpoint, just loaded in a different framework.
 
 Also variants do not correspond to different model structures, *e.g.* [stable-diffusion-v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5) is not a variant of [stable-diffusion-2-0](https://huggingface.co/stabilityai/stable-diffusion-2) since the model structure is different (Stable Diffusion 1-5 uses a different `CLIPTextModel` compared to Stable Diffusion 2.0).
 
-Pipeline checkpoints that are identical in checkpoint layout, but have been trained on different datasets, trained with vastly different training setups and thus correspond to different official releases (such as [Stable Diffusion v1-4](https://huggingface.co/CompVis/stable-diffusion-v1-4) and [Stable Diffusion v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5)) should probably be stored in individual repositories instead of as variations of eachother.
+Pipeline checkpoints that are identical in model structure, but have been trained on different datasets, trained with vastly different training setups and thus correspond to different official releases (such as [Stable Diffusion v1-4](https://huggingface.co/CompVis/stable-diffusion-v1-4) and [Stable Diffusion v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5)) should probably be stored in individual repositories instead of as variations of eachother.
 
 #### So what are checkpoint variants then? 
 
@@ -282,7 +284,7 @@ DiffusionPipeline.from_pretrained("./stable-diffusion-v1-5", variant="fp16", tor
 works just fine, while:
 
 ```py
-DiffusionPipeline.from_pretrained("./stable-diffusion-v1-5", dtype=torch.float16)
+DiffusionPipeline.from_pretrained("./stable-diffusion-v1-5", torch_dtype=torch.float16)
 ```
 
 throws an Exception:

From 5bcd411b20a42e6c322cdc6666f504ce75fe799b Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Thu, 16 Feb 2023 11:37:32 +0200
Subject: [PATCH 27/27] finish

---
 src/diffusers/models/modeling_utils.py    | 66 +++++++++++------------
 src/diffusers/pipelines/pipeline_utils.py | 13 +++--
 2 files changed, 41 insertions(+), 38 deletions(-)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index c5c106b6e9c7..913cff66c413 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -820,41 +820,16 @@ def _get_model_file(
                 f"Error no file named {weights_name} found in directory {pretrained_model_name_or_path}."
             )
     else:
-        try:
-            if revision in DEPRECATED_REVISION_ARGS and version.parse(
-                version.parse(__version__).base_version
-            ) >= version.parse("0.15.0"):
-                variant = _add_variant(weights_name, revision)
-
-                try:
-                    model_file = hf_hub_download(
-                        pretrained_model_name_or_path,
-                        filename=weights_name,
-                        cache_dir=cache_dir,
-                        force_download=force_download,
-                        proxies=proxies,
-                        resume_download=resume_download,
-                        local_files_only=local_files_only,
-                        use_auth_token=use_auth_token,
-                        user_agent=user_agent,
-                        subfolder=subfolder,
-                        revision=revision,
-                    )
-                    warnings.warn(
-                        f"You are loading the variant {variant} from {pretrained_model_name_or_path} via `revision='{variant}'` even though you can load it via `variant=`{variant}`. Loading model variants via `revision='{variant}'` is deprecated and will be removed in diffusers v1. Please use `variant='{variant}'` instead. For more information, please have a look at: ",
-                        FutureWarning,
-                    )
-                except:  # noqa: E722
-                    warnings.warn(
-                        f"You are loading the variant {variant} from {pretrained_model_name_or_path} via `revision='{variant}'`. This behavior is deprecated and will be removed in diffusers v1. One should use `variant='{variant}'` instead. However, it appears that {pretrained_model_name_or_path} currently does not have a {_add_variant(weights_name)} file in the 'main' branch of {pretrained_model_name_or_path}. \n The Diffusers team and community would be very grateful if you could open an issue: https://github.com/huggingface/diffusers/issues/new with the title '{pretrained_model_name_or_path} is missing {_add_variant(weights_name)}' so that the correct variant file can be added.",
-                        FutureWarning,
-                    )
-                    model_file = None
-            else:
-                # Load from URL or cache if already cached
+        # 1. First check if deprecated way of loading from branches is used
+        if (
+            revision in DEPRECATED_REVISION_ARGS
+            and (weights_name == WEIGHTS_NAME or weights_name == SAFETENSORS_WEIGHTS_NAME)
+            and version.parse(version.parse(__version__).base_version) >= version.parse("0.15.0")
+        ):
+            try:
                 model_file = hf_hub_download(
                     pretrained_model_name_or_path,
-                    filename=weights_name,
+                    filename=_add_variant(weights_name, revision),
                     cache_dir=cache_dir,
                     force_download=force_download,
                     proxies=proxies,
@@ -865,6 +840,31 @@ def _get_model_file(
                     subfolder=subfolder,
                     revision=revision,
                 )
+                warnings.warn(
+                    f"Loading the variant {revision} from {pretrained_model_name_or_path} via `revision='{revision}'` is deprecated. Loading instead from `revision='main'` with `variant={revision}`. Loading model variants via `revision='{revision}'` will be removed in diffusers v1. Please use `variant='{revision}'` instead.",
+                    FutureWarning,
+                )
+                return model_file
+            except:  # noqa: E722
+                warnings.warn(
+                    f"You are loading the variant {revision} from {pretrained_model_name_or_path} via `revision='{revision}'`. This behavior is deprecated and will be removed in diffusers v1. One should use `variant='{revision}'` instead. However, it appears that {pretrained_model_name_or_path} currently does not have a {_add_variant(weights_name)} file in the 'main' branch of {pretrained_model_name_or_path}. \n The Diffusers team and community would be very grateful if you could open an issue: https://github.com/huggingface/diffusers/issues/new with the title '{pretrained_model_name_or_path} is missing {_add_variant(weights_name)}' so that the correct variant file can be added.",
+                    FutureWarning,
+                )
+        try:
+            # 2. Load model file as usual
+            model_file = hf_hub_download(
+                pretrained_model_name_or_path,
+                filename=weights_name,
+                cache_dir=cache_dir,
+                force_download=force_download,
+                proxies=proxies,
+                resume_download=resume_download,
+                local_files_only=local_files_only,
+                use_auth_token=use_auth_token,
+                user_agent=user_agent,
+                subfolder=subfolder,
+                revision=revision,
+            )
             return model_file
 
         except RepositoryNotFoundError:
diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py
index 7fe1fbddcf56..cc208d34a3f9 100644
--- a/src/diffusers/pipelines/pipeline_utils.py
+++ b/src/diffusers/pipelines/pipeline_utils.py
@@ -550,7 +550,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
 
                     if set(comp_model_filenames) == set(model_filenames):
                         warnings.warn(
-                            f"You are loading the variant {revision} from {pretrained_model_name_or_path} via `revision='{revision}'` even though you can load it via `variant=`{revision}`. Loading model variants via `revision='{variant}'` is deprecated and will be removed in diffusers v1. Please use `variant='{revision}'` instead. For more information, please have a look at: ",
+                            f"You are loading the variant {revision} from {pretrained_model_name_or_path} via `revision='{revision}'` even though you can load it via `variant=`{revision}`. Loading model variants via `revision='{variant}'` is deprecated and will be removed in diffusers v1. Please use `variant='{revision}'` instead.",
                             FutureWarning,
                         )
                     else:
@@ -580,11 +580,14 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                 elif is_safetensors_available() and is_safetensors_compatible(model_filenames, variant=variant):
                     ignore_patterns = ["*.bin", "*.msgpack"]
 
-                    onnx_variant_filenames = set([f for f in variant_filenames if f.endswith(".onnx")])
-                    onnx_model_filenames = set([f for f in model_filenames if f.endswith(".onnx")])
-                    if len(onnx_variant_filenames) > 0 and onnx_model_filenames != onnx_variant_filenames:
+                    safetensors_variant_filenames = set([f for f in variant_filenames if f.endswith(".safetensors")])
+                    safetensors_model_filenames = set([f for f in model_filenames if f.endswith(".safetensors")])
+                    if (
+                        len(safetensors_variant_filenames) > 0
+                        and safetensors_model_filenames != safetensors_variant_filenames
+                    ):
                         logger.warn(
-                            f"\nA mixture of {variant} and non-{variant} filenames will be loaded.\nLoaded {variant} filenames:\n[{', '.join(onnx_variant_filenames)}]\nLoaded non-{variant} filenames:\n[{', '.join(onnx_model_filenames - onnx_variant_filenames)}\nIf this behavior is not expected, please check your folder structure."
+                            f"\nA mixture of {variant} and non-{variant} filenames will be loaded.\nLoaded {variant} filenames:\n[{', '.join(safetensors_variant_filenames)}]\nLoaded non-{variant} filenames:\n[{', '.join(safetensors_model_filenames - safetensors_variant_filenames)}\nIf this behavior is not expected, please check your folder structure."
                         )
 
                 else: