huggingface
diff --git a/‎docs/source/en/using-diffusers/loading.mdx
Lines changed: 196 additions & 412 deletions b/‎docs/source/en/using-diffusers/loading.mdx
Lines changed: 196 additions & 412 deletions
diff --git a/‎examples/controlnet/train_controlnet_flax.py
Lines changed: 40 additions & 9 deletions b/‎examples/controlnet/train_controlnet_flax.py
Lines changed: 40 additions & 9 deletions
diff --git a/‎examples/research_projects/dreambooth_inpaint/train_dreambooth_inpaint.py
Lines changed: 2 additions & 2 deletions b/‎examples/research_projects/dreambooth_inpaint/train_dreambooth_inpaint.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
Lines changed: 7 additions & 6 deletions b/‎src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
Lines changed: 7 additions & 6 deletions
diff --git a/‎src/diffusers/pipelines/pipeline_utils.py
Lines changed: 96 additions & 35 deletions b/‎src/diffusers/pipelines/pipeline_utils.py
Lines changed: 96 additions & 35 deletions
@@ -27,13 +27,13 @@
 import torch
 import torch.utils.checkpoint
 import transformers
-from datasets import load_dataset
+from datasets import load_dataset, load_from_disk
 from flax import jax_utils
 from flax.core.frozen_dict import unfreeze
 from flax.training import train_state
 from flax.training.common_utils import shard
 from huggingface_hub import create_repo, upload_folder
-from PIL import Image
+from PIL import Image, PngImagePlugin
 from torch.utils.data import IterableDataset
 from torchvision import transforms
 from tqdm.auto import tqdm
@@ -49,6 +49,11 @@
 from diffusers.utils import check_min_version, is_wandb_available
 
 
+# To prevent an error that occurs when there are abnormally large compressed data chunk in the png image
+# see more https://github.com/python-pillow/Pillow/issues/5610
+LARGE_ENOUGH_NUMBER = 100
+PngImagePlugin.MAX_TEXT_CHUNK = LARGE_ENOUGH_NUMBER * (1024**2)
+
 if is_wandb_available():
     import wandb
 
@@ -246,6 +251,12 @@ def parse_args():
         default=None,
         help="Total number of training steps to perform.",
     )
+    parser.add_argument(
+        "--checkpointing_steps",
+        type=int,
+        default=5000,
+        help=("Save a checkpoint of the training state every X updates."),
+    )
     parser.add_argument(
         "--learning_rate",
         type=float,
@@ -344,9 +355,17 @@ def parse_args():
         type=str,
         default=None,
         help=(
-            "A folder containing the training data. Folder contents must follow the structure described in"
-            " https://huggingface.co/docs/datasets/image_dataset#imagefolder. In particular, a `metadata.jsonl` file"
-            " must exist to provide the captions for the images. Ignored if `dataset_name` is specified."
+            "A folder containing the training dataset. By default it will use `load_dataset` method to load a custom dataset from the folder."
+            "Folder must contain a dataset script as described here https://huggingface.co/docs/datasets/dataset_script) ."
+            "If `--load_from_disk` flag is passed, it will use `load_from_disk` method instead. Ignored if `dataset_name` is specified."
+        ),
+    )
+    parser.add_argument(
+        "--load_from_disk",
+        action="store_true",
+        help=(
+            "If True, will load a dataset that was previously saved using `save_to_disk` from `--train_data_dir`"
+            "See more https://huggingface.co/docs/datasets/package_reference/main_classes#datasets.Dataset.load_from_disk"
         ),
     )
     parser.add_argument(
@@ -478,10 +497,15 @@ def make_train_dataset(args, tokenizer, batch_size=None):
         )
     else:
         if args.train_data_dir is not None:
-            dataset = load_dataset(
-                args.train_data_dir,
-                cache_dir=args.cache_dir,
-            )
+            if args.load_from_disk:
+                dataset = load_from_disk(
+                    args.train_data_dir,
+                )
+            else:
+                dataset = load_dataset(
+                    args.train_data_dir,
+                    cache_dir=args.cache_dir,
+                )
         # See more about loading custom images at
         # https://huggingface.co/docs/datasets/v2.0.0/en/dataset_script
 
@@ -545,6 +569,7 @@ def tokenize_captions(examples, is_train=True):
     image_transforms = transforms.Compose(
         [
             transforms.Resize(args.resolution, interpolation=transforms.InterpolationMode.BILINEAR),
+            transforms.CenterCrop(args.resolution),
             transforms.ToTensor(),
             transforms.Normalize([0.5], [0.5]),
         ]
@@ -553,6 +578,7 @@ def tokenize_captions(examples, is_train=True):
     conditioning_image_transforms = transforms.Compose(
         [
             transforms.Resize(args.resolution, interpolation=transforms.InterpolationMode.BILINEAR),
+            transforms.CenterCrop(args.resolution),
             transforms.ToTensor(),
         ]
     )
@@ -981,6 +1007,11 @@ def cumul_grad_step(grad_idx, loss_grad_rng):
                             "train/loss": jax_utils.unreplicate(train_metric)["loss"],
                         }
                     )
+            if global_step % args.checkpointing_steps == 0 and jax.process_index() == 0:
+                controlnet.save_pretrained(
+                    f"{args.output_dir}/{global_step}",
+                    params=get_params_to_save(state.params),
+                )
 
         train_metric = jax_utils.unreplicate(train_metric)
         train_step_progress_bar.close()
 
@@ -405,14 +405,14 @@ def main():
     args = parse_args()
     logging_dir = Path(args.output_dir, args.logging_dir)
 
-    accelerator_project_config = ProjectConfiguration(total_limit=args.checkpoints_total_limit)
+    project_config = ProjectConfiguration(total_limit=args.checkpoints_total_limit)
 
     accelerator = Accelerator(
         gradient_accumulation_steps=args.gradient_accumulation_steps,
         mixed_precision=args.mixed_precision,
         log_with="tensorboard",
         logging_dir=logging_dir,
-        accelerator_project_config=accelerator_project_config,
+        project_config=project_config,
     )
 
     # Currently, it's not possible to do gradient accumulation when training two models with accelerate.accumulate
 
@@ -734,14 +734,15 @@ def __call__(
             image = latents
             has_nsfw_concept = None
 
-        image = self.decode_latents(latents)
-
-        if self.safety_checker is not None:
-            image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype)
         else:
-            has_nsfw_concept = False
+            image = self.decode_latents(latents)
+
+            if self.safety_checker is not None:
+                image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype)
+            else:
+                has_nsfw_concept = False
 
-        image = self.image_processor.postprocess(image, output_type=output_type)
+            image = self.image_processor.postprocess(image, output_type=output_type)
 
         # Offload last model to CPU
         if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
 
@@ -134,7 +134,7 @@ class AudioPipelineOutput(BaseOutput):
     audios: np.ndarray
 
 
-def is_safetensors_compatible(filenames, variant=None) -> bool:
+def is_safetensors_compatible(filenames, variant=None, passed_components=None) -> bool:
     """
     Checking for safetensors compatibility:
     - By default, all models are saved with the default pytorch serialization, so we use the list of default pytorch
@@ -150,9 +150,14 @@ def is_safetensors_compatible(filenames, variant=None) -> bool:
 
     sf_filenames = set()
 
+    passed_components = passed_components or []
+
     for filename in filenames:
         _, extension = os.path.splitext(filename)
 
+        if len(filename.split("/")) == 2 and filename.split("/")[0] in passed_components:
+            continue
+
         if extension == ".bin":
             pt_filenames.append(filename)
         elif extension == ".safetensors":
@@ -163,10 +168,8 @@ def is_safetensors_compatible(filenames, variant=None) -> bool:
         path, filename = os.path.split(filename)
         filename, extension = os.path.splitext(filename)
 
-        if filename == "pytorch_model":
-            filename = "model"
-        elif filename == f"pytorch_model.{variant}":
-            filename = f"model.{variant}"
+        if filename.startswith("pytorch_model"):
+            filename = filename.replace("pytorch_model", "model")
         else:
             filename = filename
 
@@ -196,24 +199,51 @@ def variant_compatible_siblings(filenames, variant=None) -> Union[List[os.PathLi
     weight_prefixes = [w.split(".")[0] for w in weight_names]
     # .bin, .safetensors, ...
     weight_suffixs = [w.split(".")[-1] for w in weight_names]
+    # -00001-of-00002
+    transformers_index_format = "\d{5}-of-\d{5}"
+
+    if variant is not None:
+        # `diffusion_pytorch_model.fp16.bin` as well as `model.fp16-00001-of-00002.safetenstors`
+        variant_file_re = re.compile(
+            f"({'|'.join(weight_prefixes)})\.({variant}|{variant}-{transformers_index_format})\.({'|'.join(weight_suffixs)})$"
+        )
+        # `text_encoder/pytorch_model.bin.index.fp16.json`
+        variant_index_re = re.compile(
+            f"({'|'.join(weight_prefixes)})\.({'|'.join(weight_suffixs)})\.index\.{variant}\.json$"
+        )
 
-    variant_file_regex = (
-        re.compile(f"({'|'.join(weight_prefixes)})(.{variant}.)({'|'.join(weight_suffixs)})")
-        if variant is not None
-        else None
+    # `diffusion_pytorch_model.bin` as well as `model-00001-of-00002.safetenstors`
+    non_variant_file_re = re.compile(
+        f"({'|'.join(weight_prefixes)})(-{transformers_index_format})?\.({'|'.join(weight_suffixs)})$"
     )
-    non_variant_file_regex = re.compile(f"{'|'.join(weight_names)}")
+    # `text_encoder/pytorch_model.bin.index.json`
+    non_variant_index_re = re.compile(f"({'|'.join(weight_prefixes)})\.({'|'.join(weight_suffixs)})\.index\.json")
 
     if variant is not None:
-        variant_filenames = {f for f in filenames if variant_file_regex.match(f.split("/")[-1]) is not None}
+        variant_weights = {f for f in filenames if variant_file_re.match(f.split("/")[-1]) is not None}
+        variant_indexes = {f for f in filenames if variant_index_re.match(f.split("/")[-1]) is not None}
+        variant_filenames = variant_weights | variant_indexes
     else:
         variant_filenames = set()
 
-    non_variant_filenames = {f for f in filenames if non_variant_file_regex.match(f.split("/")[-1]) is not None}
+    non_variant_weights = {f for f in filenames if non_variant_file_re.match(f.split("/")[-1]) is not None}
+    non_variant_indexes = {f for f in filenames if non_variant_index_re.match(f.split("/")[-1]) is not None}
+    non_variant_filenames = non_variant_weights | non_variant_indexes
 
+    # all variant filenames will be used by default
     usable_filenames = set(variant_filenames)
+
+    def convert_to_variant(filename):
+        if "index" in filename:
+            variant_filename = filename.replace("index", f"index.{variant}")
+        elif re.compile(f"^(.*?){transformers_index_format}").match(filename) is not None:
+            variant_filename = f"{filename.split('-')[0]}.{variant}-{'-'.join(filename.split('-')[1:])}"
+        else:
+            variant_filename = f"{filename.split('.')[0]}.{variant}.{filename.split('.')[1]}"
+        return variant_filename
+
     for f in non_variant_filenames:
-        variant_filename = f"{f.split('.')[0]}.{variant}.{f.split('.')[1]}"
+        variant_filename = convert_to_variant(f)
         if variant_filename not in usable_filenames:
             usable_filenames.add(f)
 
@@ -292,6 +322,27 @@ def get_class_obj_and_candidates(library_name, class_name, importable_classes, p
     return class_obj, class_candidates
 
 
+def _get_pipeline_class(class_obj, config, custom_pipeline=None, cache_dir=None, revision=None):
+    if custom_pipeline is not None:
+        if custom_pipeline.endswith(".py"):
+            path = Path(custom_pipeline)
+            # decompose into folder & file
+            file_name = path.name
+            custom_pipeline = path.parent.absolute()
+        else:
+            file_name = CUSTOM_PIPELINE_FILE_NAME
+
+        return get_class_from_dynamic_module(
+            custom_pipeline, module_file=file_name, cache_dir=cache_dir, revision=revision
+        )
+
+    if class_obj != DiffusionPipeline:
+        return class_obj
+
+    diffusers_module = importlib.import_module(class_obj.__module__.split(".")[0])
+    return getattr(diffusers_module, config["_class_name"])
+
+
 def load_sub_model(
     library_name: str,
     class_name: str,
@@ -779,7 +830,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
         device_map = kwargs.pop("device_map", None)
         low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT)
         variant = kwargs.pop("variant", None)
-        kwargs.pop("use_safetensors", None if is_safetensors_available() else False)
+        use_safetensors = kwargs.pop("use_safetensors", None if is_safetensors_available() else False)
 
         # 1. Download the checkpoints and configs
         # use snapshot download here to get it working from from_pretrained
@@ -794,8 +845,11 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                 use_auth_token=use_auth_token,
                 revision=revision,
                 from_flax=from_flax,
+                use_safetensors=use_safetensors,
                 custom_pipeline=custom_pipeline,
+                custom_revision=custom_revision,
                 variant=variant,
+                **kwargs,
             )
         else:
             cached_folder = pretrained_model_name_or_path
@@ -810,29 +864,17 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
             for folder in os.listdir(cached_folder):
                 folder_path = os.path.join(cached_folder, folder)
                 is_folder = os.path.isdir(folder_path) and folder in config_dict
-                variant_exists = is_folder and any(path.split(".")[1] == variant for path in os.listdir(folder_path))
+                variant_exists = is_folder and any(
+                    p.split(".")[1].startswith(variant) for p in os.listdir(folder_path)
+                )
                 if variant_exists:
                     model_variants[folder] = variant
 
         # 3. Load the pipeline class, if using custom module then load it from the hub
         # if we load from explicit class, let's use it
-        if custom_pipeline is not None:
-            if custom_pipeline.endswith(".py"):
-                path = Path(custom_pipeline)
-                # decompose into folder & file
-                file_name = path.name
-                custom_pipeline = path.parent.absolute()
-            else:
-                file_name = CUSTOM_PIPELINE_FILE_NAME
-
-            pipeline_class = get_class_from_dynamic_module(
-                custom_pipeline, module_file=file_name, cache_dir=cache_dir, revision=custom_revision
-            )
-        elif cls != DiffusionPipeline:
-            pipeline_class = cls
-        else:
-            diffusers_module = importlib.import_module(cls.__module__.split(".")[0])
-            pipeline_class = getattr(diffusers_module, config_dict["_class_name"])
+        pipeline_class = _get_pipeline_class(
+            cls, config_dict, custom_pipeline=custom_pipeline, cache_dir=cache_dir, revision=custom_revision
+        )
 
         # DEPRECATED: To be removed in 1.0.0
         if pipeline_class.__name__ == "StableDiffusionInpaintPipeline" and version.parse(
@@ -1095,6 +1137,7 @@ def download(cls, pretrained_model_name, **kwargs) -> Union[str, os.PathLike]:
         revision = kwargs.pop("revision", None)
         from_flax = kwargs.pop("from_flax", False)
         custom_pipeline = kwargs.pop("custom_pipeline", None)
+        custom_revision = kwargs.pop("custom_revision", None)
         variant = kwargs.pop("variant", None)
         use_safetensors = kwargs.pop("use_safetensors", None)
 
@@ -1153,7 +1196,7 @@ def download(cls, pretrained_model_name, **kwargs) -> Union[str, os.PathLike]:
             # this enables downloading schedulers, tokenizers, ...
             allow_patterns += [os.path.join(k, "*") for k in folder_names if k not in model_folder_names]
             # also allow downloading config.json files with the model
-            allow_patterns += [os.path.join(k, "*.json") for k in model_folder_names]
+            allow_patterns += [os.path.join(k, "config.json") for k in model_folder_names]
 
             allow_patterns += [
                 SCHEDULER_CONFIG_NAME,
@@ -1162,17 +1205,28 @@ def download(cls, pretrained_model_name, **kwargs) -> Union[str, os.PathLike]:
                 CUSTOM_PIPELINE_FILE_NAME,
             ]
 
+            # retrieve passed components that should not be downloaded
+            pipeline_class = _get_pipeline_class(
+                cls, config_dict, custom_pipeline=custom_pipeline, cache_dir=cache_dir, revision=custom_revision
+            )
+            expected_components, _ = cls._get_signature_keys(pipeline_class)
+            passed_components = [k for k in expected_components if k in kwargs]
+
             if (
                 use_safetensors
                 and not allow_pickle
-                and not is_safetensors_compatible(model_filenames, variant=variant)
+                and not is_safetensors_compatible(
+                    model_filenames, variant=variant, passed_components=passed_components
+                )
             ):
                 raise EnvironmentError(
                     f"Could not found the necessary `safetensors` weights in {model_filenames} (variant={variant})"
                 )
             if from_flax:
                 ignore_patterns = ["*.bin", "*.safetensors", "*.onnx", "*.pb"]
-            elif use_safetensors and is_safetensors_compatible(model_filenames, variant=variant):
+            elif use_safetensors and is_safetensors_compatible(
+                model_filenames, variant=variant, passed_components=passed_components
+            ):
                 ignore_patterns = ["*.bin", "*.msgpack"]
 
                 safetensors_variant_filenames = {f for f in variant_filenames if f.endswith(".safetensors")}
@@ -1194,6 +1248,13 @@ def download(cls, pretrained_model_name, **kwargs) -> Union[str, os.PathLike]:
                         f"\nA mixture of {variant} and non-{variant} filenames will be loaded.\nLoaded {variant} filenames:\n[{', '.join(bin_variant_filenames)}]\nLoaded non-{variant} filenames:\n[{', '.join(bin_model_filenames - bin_variant_filenames)}\nIf this behavior is not expected, please check your folder structure."
                     )
 
+            # Don't download any objects that are passed
+            allow_patterns = [
+                p for p in allow_patterns if not (len(p.split("/")) == 2 and p.split("/")[0] in passed_components)
+            ]
+            # Don't download index files of forbidden patterns either
+            ignore_patterns = ignore_patterns + [f"{i}.index.*json" for i in ignore_patterns]
+
             re_ignore_pattern = [re.compile(fnmatch.translate(p)) for p in ignore_patterns]
             re_allow_pattern = [re.compile(fnmatch.translate(p)) for p in allow_patterns]