diff --git a/scripts/convert_original_stable_diffusion_to_diffusers.py b/scripts/convert_original_stable_diffusion_to_diffusers.py index c232efe56758..d449f283d95e 100644 --- a/scripts/convert_original_stable_diffusion_to_diffusers.py +++ b/scripts/convert_original_stable_diffusion_to_diffusers.py @@ -48,7 +48,10 @@ "--pipeline_type", default=None, type=str, - help="The pipeline type. If `None` pipeline will be automatically inferred.", + help=( + "The pipeline type. One of 'FrozenOpenCLIPEmbedder', 'FrozenCLIPEmbedder', 'PaintByExample'" + ". If `None` pipeline will be automatically inferred." + ), ) parser.add_argument( "--image_size", @@ -65,7 +68,7 @@ type=str, help=( "The prediction type that the model was trained on. Use 'epsilon' for Stable Diffusion v1.X and Stable" - " Siffusion v2 Base. Use 'v-prediction' for Stable Diffusion v2." + " Diffusion v2 Base. Use 'v_prediction' for Stable Diffusion v2." ), ) parser.add_argument( @@ -79,8 +82,7 @@ ) parser.add_argument( "--upcast_attention", - default=False, - type=bool, + action="store_true", help=( "Whether the attention computation should always be upcasted. This is necessary when running stable" " diffusion 2.1." @@ -111,5 +113,6 @@ num_in_channels=args.num_in_channels, upcast_attention=args.upcast_attention, from_safetensors=args.from_safetensors, + device=args.device, ) pipe.save_pretrained(args.dump_path, safe_serialization=args.to_safetensors) diff --git a/src/diffusers/pipelines/audio_diffusion/mel.py b/src/diffusers/pipelines/audio_diffusion/mel.py index fbc756d496a9..ccb296098aca 100644 --- a/src/diffusers/pipelines/audio_diffusion/mel.py +++ b/src/diffusers/pipelines/audio_diffusion/mel.py @@ -13,17 +13,12 @@ # limitations under the License. -import warnings +import numpy as np # noqa: E402 from ...configuration_utils import ConfigMixin, register_to_config from ...schedulers.scheduling_utils import SchedulerMixin -warnings.filterwarnings("ignore") - -import numpy as np # noqa: E402 - - try: import librosa # noqa: E402 diff --git a/src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py b/src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py index 338c1d3584d9..033c0a23a98e 100644 --- a/src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +++ b/src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py @@ -39,10 +39,13 @@ from diffusers.pipelines.paint_by_example import PaintByExampleImageEncoder, PaintByExamplePipeline from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker -from ...utils import is_omegaconf_available, is_safetensors_available +from ...utils import is_omegaconf_available, is_safetensors_available, logging from ...utils.import_utils import BACKENDS_MAPPING +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + def shave_segments(path, n_shave_prefix_segments=1): """ Removes segments. Positive values shave the first segments, negative shave the last segments. @@ -801,11 +804,11 @@ def load_pipeline_from_original_stable_diffusion_ckpt( corresponding to the original architecture. If `None`, will be automatically inferred by looking for a key that only exists in SD2.0 models. :param image_size: The image size that the model was trained on. Use 512 for Stable Diffusion v1.X and Stable - Siffusion v2 + Diffusion v2 Base. Use 768 for Stable Diffusion v2. :param prediction_type: The prediction type that the model was trained on. Use `'epsilon'` for Stable Diffusion v1.X and Stable - Siffusion v2 Base. Use `'v-prediction'` for Stable Diffusion v2. + Diffusion v2 Base. Use `'v_prediction'` for Stable Diffusion v2. :param num_in_channels: The number of input channels. If `None` number of input channels will be automatically inferred. :param scheduler_type: Type of scheduler to use. Should be one of `["pndm", "lms", "heun", "euler", "euler-ancestral", "dpm", "ddim"]`. :param model_type: The pipeline type. `None` to automatically infer, or one of @@ -820,6 +823,8 @@ def load_pipeline_from_original_stable_diffusion_ckpt( `checkpoint_path` is in `safetensors` format, load checkpoint with safetensors instead of PyTorch. :return: A StableDiffusionPipeline object representing the passed-in `.ckpt`/`.safetensors` file. """ + if prediction_type == "v-prediction": + prediction_type = "v_prediction" if not is_omegaconf_available(): raise ValueError(BACKENDS_MAPPING["omegaconf"][1]) @@ -957,6 +962,7 @@ def load_pipeline_from_original_stable_diffusion_ckpt( # Convert the text model. if model_type is None: model_type = original_config.model.params.cond_stage_config.target.split(".")[-1] + logger.debug(f"no `model_type` given, `model_type` inferred as: {model_type}") if model_type == "FrozenOpenCLIPEmbedder": text_model = convert_open_clip_checkpoint(checkpoint) diff --git a/src/diffusers/schedulers/scheduling_ddim.py b/src/diffusers/schedulers/scheduling_ddim.py index 9542325137e9..4eeb67f6b182 100644 --- a/src/diffusers/schedulers/scheduling_ddim.py +++ b/src/diffusers/schedulers/scheduling_ddim.py @@ -305,7 +305,6 @@ def step( prev_sample = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction if eta > 0: - # randn_like does not support generator https://github.com/pytorch/pytorch/issues/27072 device = model_output.device if variance_noise is not None and generator is not None: raise ValueError( diff --git a/src/diffusers/schedulers/scheduling_unclip.py b/src/diffusers/schedulers/scheduling_unclip.py index f5b28468284a..da074ec61fa4 100644 --- a/src/diffusers/schedulers/scheduling_unclip.py +++ b/src/diffusers/schedulers/scheduling_unclip.py @@ -106,8 +106,11 @@ def __init__( clip_sample: bool = True, clip_sample_range: Optional[float] = 1.0, prediction_type: str = "epsilon", + beta_schedule: str = "squaredcos_cap_v2", ): - # beta scheduler is "squaredcos_cap_v2" + if beta_schedule != "squaredcos_cap_v2": + raise ValueError("UnCLIPScheduler only supports `beta_schedule`: 'squaredcos_cap_v2'") + self.betas = betas_for_alpha_bar(num_train_timesteps) self.alphas = 1.0 - self.betas diff --git a/src/diffusers/utils/testing_utils.py b/src/diffusers/utils/testing_utils.py index 76a470c90038..a36e5ccf27cd 100644 --- a/src/diffusers/utils/testing_utils.py +++ b/src/diffusers/utils/testing_utils.py @@ -17,23 +17,36 @@ from packaging import version from .import_utils import is_flax_available, is_onnx_available, is_torch_available +from .logging import get_logger global_rng = random.Random() +logger = get_logger(__name__) if is_torch_available(): import torch - torch_device = "cuda" if torch.cuda.is_available() else "cpu" - is_torch_higher_equal_than_1_12 = version.parse(version.parse(torch.__version__).base_version) >= version.parse( - "1.12" - ) + if "DIFFUSERS_TEST_DEVICE" in os.environ: + torch_device = os.environ["DIFFUSERS_TEST_DEVICE"] - if is_torch_higher_equal_than_1_12: - # Some builds of torch 1.12 don't have the mps backend registered. See #892 for more details - mps_backend_registered = hasattr(torch.backends, "mps") - torch_device = "mps" if (mps_backend_registered and torch.backends.mps.is_available()) else torch_device + available_backends = ["cuda", "cpu", "mps"] + if torch_device not in available_backends: + raise ValueError( + f"unknown torch backend for diffusers tests: {torch_device}. Available backends are:" + f" {available_backends}" + ) + logger.info(f"torch_device overrode to {torch_device}") + else: + torch_device = "cuda" if torch.cuda.is_available() else "cpu" + is_torch_higher_equal_than_1_12 = version.parse( + version.parse(torch.__version__).base_version + ) >= version.parse("1.12") + + if is_torch_higher_equal_than_1_12: + # Some builds of torch 1.12 don't have the mps backend registered. See #892 for more details + mps_backend_registered = hasattr(torch.backends, "mps") + torch_device = "mps" if (mps_backend_registered and torch.backends.mps.is_available()) else torch_device def torch_all_close(a, b, *args, **kwargs): diff --git a/tests/pipelines/unclip/test_unclip.py b/tests/pipelines/unclip/test_unclip.py index c065ee7ea0f4..daa083aa1e1a 100644 --- a/tests/pipelines/unclip/test_unclip.py +++ b/tests/pipelines/unclip/test_unclip.py @@ -30,6 +30,7 @@ class UnCLIPPipelineFastTests(PipelineTesterMixin, unittest.TestCase): pipeline_class = UnCLIPPipeline + test_xformers_attention = False required_optional_params = [ "generator", diff --git a/tests/test_pipelines_common.py b/tests/test_pipelines_common.py index 08f13b89607c..a1d3122f875c 100644 --- a/tests/test_pipelines_common.py +++ b/tests/test_pipelines_common.py @@ -259,6 +259,7 @@ def _test_inference_batch_single_identical( # Taking the median of the largest differences # is resilient to outliers diff = np.abs(output_batch[0][0] - output[0][0]) + diff = diff.flatten() diff.sort() max_diff = np.median(diff[-5:]) else: