From 50615d339b4b97136900b711fcae6638a292eeec Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Wed, 8 Mar 2023 02:58:20 +0000 Subject: [PATCH 01/45] add image_processor --- src/diffusers/__init__.py | 2 +- src/diffusers/image_processor.py | 160 ++++++++++++++++++ .../pipeline_stable_diffusion_img2img.py | 36 ++-- tests/test_image_processor.py | 63 +++++++ 4 files changed, 234 insertions(+), 27 deletions(-) create mode 100644 src/diffusers/image_processor.py create mode 100644 tests/test_image_processor.py diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index f480b4100907..bde1b37c858f 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -16,7 +16,7 @@ is_unidecode_available, logging, ) - +from .image_processor import VaeImageProcessor try: if not is_onnx_available(): diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py new file mode 100644 index 000000000000..3887f1175737 --- /dev/null +++ b/src/diffusers/image_processor.py @@ -0,0 +1,160 @@ +# Copyright 2023 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Union, Optional + +import PIL +from PIL import Image +import torch +import numpy as np + +from .utils import PIL_INTERPOLATION, CONFIG_NAME +from .configuration_utils import ConfigMixin, register_to_config + +class VaeImageProcessor(ConfigMixin): + """ + Image Processor for VAE + + [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__` + function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`. + + Args: + do_resize (`bool`, *optional*, defaults to `True`): + Whether to resize the image's (height, width) dimensions to the specified `size`. + `do_resize` in the `preprocess` method. + vae_scale_factor (`int`, *optional*, defaults to `8`): + scale factor in VAE, if do_resize is True, the image will be automatically resized to multipls of vae_scale_factor + resample (`str`, *optional*, defaults to `lanczos`): + Resampling filter to use if resizing the image. + do_normalize (`bool`, *optional*, defaults to `True`): + Whether to normalize the image to [-1,1] + """ + + config_name = CONFIG_NAME + + @register_to_config + def __init__( + self, + do_resize: bool = True, + vae_scale_factor: int = 8, + resample: str = "lanczos", + do_normalize: bool = True, + ): + super().__init__() + + @staticmethod + def numpy_to_pil(images): + """ + Convert a numpy image or a batch of images to a PIL image. + """ + if images.ndim == 3: + images = images[None, ...] + images = (images * 255).round().astype("uint8") + if images.shape[-1] == 1: + # special case for grayscale (single channel) images + pil_images = [Image.fromarray(image.squeeze(), mode="L") for image in images] + else: + pil_images = [Image.fromarray(image) for image in images] + + return pil_images + + @staticmethod + def numpy_to_pt(images): + """ + Convert a numpy image to a pytorch tensor + """ + images = torch.from_numpy(images.transpose(0, 3, 1, 2)) + return images + + @staticmethod + def pt_to_numpy(images): + """ + Convert a numpy image to a pytorch tensor + """ + images = images.cpu().numpy().transpose(0, 2, 3, 1) + return images + + @staticmethod + def normalize(images): + """ + Normalize an image array to [-1,1] + """ + return 2.0 * images - 1.0 + + def resize(self, images: PIL.Image.Image) -> PIL.Image.Image: + """ + Resize an PIL image. Both height and width will be resized to integer multiple of vae_scale_factor + """ + w, h = images.size + w, h = map(lambda x: x - x % self.vae_scale_factor, (w, h)) # resize to integer multiple of vae_scale_factor + images = images.resize((w, h), resample=PIL_INTERPOLATION[self.resample]) + return images + + def encode( + self, + image: Union[torch.FloatTensor, PIL.Image.Image, np.ndarray], + ) -> torch.Tensor: + + """ + Preprocess the image input, accpet formats in PIL images, numpy arrays or pytorch tensors" + """ + # convert PIL or list of PIL into numpy + if isinstance(image, PIL.Image.Image): + image = [image] + + if isinstance(image[0], PIL.Image.Image): + if self.do_resize: + image = [self.resize(i) for i in image] + image = [np.array(i).astype(np.float32) / 255.0 for i in image] + + if isinstance(image, np.ndarray): + image = self.numpy_to_pt(image) + elif isinstance(image[0], np.ndarray): + image = self.numpy_to_pt(np.stack(image, axis=0)) + elif not isinstance(image, torch.Tensor) and isinstance(image[0], torch.Tensor): + image = torch.cat(image, dim=0) + + # expected range [0,1], normalize to [-1,1] + if image.min() < 0: + warnings.warn( + "Passing `image` as torch tensor with value range in [-1,1] is deprecated. The expected value range for image tensor is [0,1] " + f"when passing as pytorch tensor or numpy Array. You passed `image` with value range [{image.min()},{image.max()}]", + FutureWarning, + ) + self.do_normalize = False + + if self.do_normalize: + image = self.normalize(image) + + return image + + def decode( + self, + image, + output_type: str ='pil', + ): + + if output_type == 'pt': + return image + + image = self.pt_to_numpy(image) + + if output_type == 'np': + return image + elif output_type == 'pil': + return self.numpy_to_pil(image) + else: + raise ValueError(f"Unsupported output_type {output_type}.") + + \ No newline at end of file diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 172ab15a757e..526478cb46ad 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -36,6 +36,7 @@ from ..pipeline_utils import DiffusionPipeline from . import StableDiffusionPipelineOutput from .safety_checker import StableDiffusionSafetyChecker +from ...image_processor import VaeImageProcessor logger = logging.get_logger(__name__) # pylint: disable=invalid-name @@ -69,27 +70,6 @@ """ -def preprocess(image): - if isinstance(image, torch.Tensor): - return image - elif isinstance(image, PIL.Image.Image): - image = [image] - - if isinstance(image[0], PIL.Image.Image): - w, h = image[0].size - w, h = map(lambda x: x - x % 8, (w, h)) # resize to integer multiple of 8 - - image = [np.array(i.resize((w, h), resample=PIL_INTERPOLATION["lanczos"]))[None, :] for i in image] - image = np.concatenate(image, axis=0) - image = np.array(image).astype(np.float32) / 255.0 - image = image.transpose(0, 3, 1, 2) - image = 2.0 * image - 1.0 - image = torch.from_numpy(image) - elif isinstance(image[0], torch.Tensor): - image = torch.cat(image, dim=0) - return image - - class StableDiffusionImg2ImgPipeline(DiffusionPipeline): r""" Pipeline for text-guided image to image generation using Stable Diffusion. @@ -195,8 +175,7 @@ def __init__( deprecate("sample_size<64", "1.0.0", deprecation_message, standard_warn=False) new_config = dict(unet.config) new_config["sample_size"] = 64 - unet._internal_dict = FrozenDict(new_config) - + unet._internal_dict = FrozenDict(new_config) self.register_modules( vae=vae, text_encoder=text_encoder, @@ -207,7 +186,12 @@ def __init__( feature_extractor=feature_extractor, ) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) - self.register_to_config(requires_safety_checker=requires_safety_checker) + + vae_feature_extractor = VaeImageProcessor( + vae_scale_factor =self.vae_scale_factor) + self.register_to_config( + requires_safety_checker=requires_safety_checker, + vae_feature_extractor = vae_feature_extractor) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload def enable_sequential_cpu_offload(self, gpu_id=0): @@ -674,7 +658,7 @@ def __call__( ) # 4. Preprocess image - image = preprocess(image) + image = self.vae_feature_extractor.encode(image) # 5. set timesteps self.scheduler.set_timesteps(num_inference_steps, device=device) @@ -713,7 +697,7 @@ def __call__( progress_bar.update() if callback is not None and i % callback_steps == 0: callback(i, t, latents) - + # 9. Post-processing image = self.decode_latents(latents) diff --git a/tests/test_image_processor.py b/tests/test_image_processor.py new file mode 100644 index 000000000000..b3377deb893e --- /dev/null +++ b/tests/test_image_processor.py @@ -0,0 +1,63 @@ +# coding=utf-8 +# Copyright 2023 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import torch +import numpy as np + +from diffusers import VaeImageProcessor + +class ImageProcessorTest(unittest.TestCase): + + @property + def dummy_sample(self): + batch_size = 4 + num_channels = 3 + height = 8 + width = 8 + + sample = torch.rand((batch_size, num_channels, height, width)) + + return sample + + def test_encode_input_pt(self): + image_processor = VaeImageProcessor(do_resize=False, do_normalize=False) + + input_pt = self.dummy_sample + out_pt = image_processor.decode( + image_processor.encode(input_pt), + output_type='pt') + assert np.abs(input_pt.cpu().numpy() - out_pt.cpu().numpy()).max() < 1e-6 + + def test_encode_input_np(self): + image_processor = VaeImageProcessor(do_resize=False, do_normalize=False) + + input_np = self.dummy_sample.cpu().numpy().transpose(0, 2, 3, 1) + out_np = image_processor.decode( + image_processor.encode(input_np), + output_type='np') + assert np.abs(input_np - out_np).max() < 1e-6 + + def test_encode_input_pil(self): + image_processor = VaeImageProcessor(do_resize=False, do_normalize=False) + + input_np = self.dummy_sample.cpu().numpy().transpose(0, 2, 3, 1) + input_pil = image_processor.numpy_to_pil(input_np) + + out_pil = image_processor.decode( + image_processor.encode(input_pil), + output_type='pil') + for i, o in zip(input_pil, out_pil): + assert np.abs(np.array(i) - np.array(o)).max() == 0 \ No newline at end of file From d82730d1281c38bf74d6ff76355b5684089df565 Mon Sep 17 00:00:00 2001 From: YiYi Xu Date: Thu, 9 Mar 2023 06:43:48 -1000 Subject: [PATCH 02/45] Apply suggestions from code review Co-authored-by: Patrick von Platen --- src/diffusers/image_processor.py | 21 ++++++++++++------- .../pipeline_stable_diffusion_img2img.py | 2 +- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index 3887f1175737..2b5561e43558 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -50,7 +50,7 @@ def __init__( vae_scale_factor: int = 8, resample: str = "lanczos", do_normalize: bool = True, - ): + ): super().__init__() @staticmethod @@ -110,31 +110,38 @@ def encode( Preprocess the image input, accpet formats in PIL images, numpy arrays or pytorch tensors" """ # convert PIL or list of PIL into numpy - if isinstance(image, PIL.Image.Image): + supported_formats = [PIL.Image.Image, np.ndarray, torch.Tensor] + if isinstance(image, supported_formats): + image = [image] + elif isinstance(image, list) and all(isinstance(i, supported_formats) for i in image): + image = image + else: + raise ValueError("Raise nice error messages here that incorrect format is used.") image = [image] if isinstance(image[0], PIL.Image.Image): if self.do_resize: image = [self.resize(i) for i in image] image = [np.array(i).astype(np.float32) / 255.0 for i in image] + elif self.do_resize: + # Currently we only support resizing for PIL so in case np or torch is used AND resizing is activating (which it is by default) then let's do the following: - 1. Check if the image sizes are not a multiple of `self.vae_scale_factor` => If it's not the case we throw a nice error - if isinstance(image, np.ndarray): - image = self.numpy_to_pt(image) - elif isinstance(image[0], np.ndarray): + if isinstance(image[0], np.ndarray): image = self.numpy_to_pt(np.stack(image, axis=0)) elif not isinstance(image, torch.Tensor) and isinstance(image[0], torch.Tensor): image = torch.cat(image, dim=0) # expected range [0,1], normalize to [-1,1] + do_normalize = self.do_normalize if image.min() < 0: warnings.warn( "Passing `image` as torch tensor with value range in [-1,1] is deprecated. The expected value range for image tensor is [0,1] " f"when passing as pytorch tensor or numpy Array. You passed `image` with value range [{image.min()},{image.max()}]", FutureWarning, ) - self.do_normalize = False + do_normalize = False - if self.do_normalize: + if do_normalize: image = self.normalize(image) return image diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 526478cb46ad..b1754987adc1 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -191,7 +191,7 @@ def __init__( vae_scale_factor =self.vae_scale_factor) self.register_to_config( requires_safety_checker=requires_safety_checker, - vae_feature_extractor = vae_feature_extractor) + ) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload def enable_sequential_cpu_offload(self, gpu_id=0): From d0d1437e4cca9f4254dfb1cee22a79c3f992a348 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Thu, 9 Mar 2023 21:46:42 +0000 Subject: [PATCH 03/45] add more tests --- src/diffusers/image_processor.py | 20 +++++++---- tests/test_image_processor.py | 57 ++++++++++++++++++++++---------- 2 files changed, 53 insertions(+), 24 deletions(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index 2b5561e43558..ec60b4a1030d 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -74,6 +74,10 @@ def numpy_to_pt(images): """ Convert a numpy image to a pytorch tensor """ + if images.ndim ==3: + images = images[...,None] + elif images.ndim==5: + images = images.squeeze(0) images = torch.from_numpy(images.transpose(0, 3, 1, 2)) return images @@ -109,28 +113,30 @@ def encode( """ Preprocess the image input, accpet formats in PIL images, numpy arrays or pytorch tensors" """ - # convert PIL or list of PIL into numpy - supported_formats = [PIL.Image.Image, np.ndarray, torch.Tensor] + supported_formats = (PIL.Image.Image, np.ndarray, torch.Tensor) if isinstance(image, supported_formats): image = [image] elif isinstance(image, list) and all(isinstance(i, supported_formats) for i in image): image = image else: - raise ValueError("Raise nice error messages here that incorrect format is used.") - image = [image] - + raise ValueError(f"incorrect image format is used - currently we only support PIL image, numpy array or pytorch tensor") + if isinstance(image[0], PIL.Image.Image): if self.do_resize: image = [self.resize(i) for i in image] image = [np.array(i).astype(np.float32) / 255.0 for i in image] - elif self.do_resize: - # Currently we only support resizing for PIL so in case np or torch is used AND resizing is activating (which it is by default) then let's do the following: - 1. Check if the image sizes are not a multiple of `self.vae_scale_factor` => If it's not the case we throw a nice error if isinstance(image[0], np.ndarray): image = self.numpy_to_pt(np.stack(image, axis=0)) elif not isinstance(image, torch.Tensor) and isinstance(image[0], torch.Tensor): image = torch.cat(image, dim=0) + if image.ndim==5: + image = image.squeeze(0) + _, _, height, width = image.shape + if self.do_resize and (height % self.vae_scale_factor != 0 or width % self.vae_scale_factor != 0): + raise ValueError(f"the height and width of image have to be divisible by {self.vae_scale_factor} but are {height} and {width}.") + # expected range [0,1], normalize to [-1,1] do_normalize = self.do_normalize if image.min() < 0: diff --git a/tests/test_image_processor.py b/tests/test_image_processor.py index b3377deb893e..d3e683988d9c 100644 --- a/tests/test_image_processor.py +++ b/tests/test_image_processor.py @@ -17,13 +17,16 @@ import torch import numpy as np +import PIL + from diffusers import VaeImageProcessor + class ImageProcessorTest(unittest.TestCase): @property def dummy_sample(self): - batch_size = 4 + batch_size = 1 num_channels = 3 height = 8 width = 8 @@ -31,24 +34,41 @@ def dummy_sample(self): sample = torch.rand((batch_size, num_channels, height, width)) return sample - + + def to_np(self, image): + if isinstance(image[0], PIL.Image.Image): + return np.stack([np.array(i) for i in image],axis=0) + elif isinstance(image, torch.Tensor): + return image.cpu().numpy().transpose(0, 2, 3, 1) + return image + def test_encode_input_pt(self): image_processor = VaeImageProcessor(do_resize=False, do_normalize=False) - + input_pt = self.dummy_sample - out_pt = image_processor.decode( - image_processor.encode(input_pt), - output_type='pt') - assert np.abs(input_pt.cpu().numpy() - out_pt.cpu().numpy()).max() < 1e-6 + input_np = self.to_np(input_pt) + + for output_type in ['pt','np','pil']: + out = image_processor.decode( + image_processor.encode(input_pt), + output_type=output_type, + ) + out_np = self.to_np(out) + in_np = (input_np * 255).round() if output_type == 'pil' else input_np + assert np.abs(in_np - out_np).max() < 1e-6, f"decoded output does not match input for output_type {output_type}" def test_encode_input_np(self): image_processor = VaeImageProcessor(do_resize=False, do_normalize=False) - input_np = self.dummy_sample.cpu().numpy().transpose(0, 2, 3, 1) - out_np = image_processor.decode( - image_processor.encode(input_np), - output_type='np') - assert np.abs(input_np - out_np).max() < 1e-6 + + for output_type in ['pt','np','pil']: + out = image_processor.decode( + image_processor.encode(input_np), + output_type=output_type) + + out_np = self.to_np(out) + in_np = (input_np * 255).round() if output_type == 'pil' else input_np + assert np.abs(in_np - out_np).max() < 1e-6, f"decoded output does not match input for output_type {output_type}" def test_encode_input_pil(self): image_processor = VaeImageProcessor(do_resize=False, do_normalize=False) @@ -56,8 +76,11 @@ def test_encode_input_pil(self): input_np = self.dummy_sample.cpu().numpy().transpose(0, 2, 3, 1) input_pil = image_processor.numpy_to_pil(input_np) - out_pil = image_processor.decode( - image_processor.encode(input_pil), - output_type='pil') - for i, o in zip(input_pil, out_pil): - assert np.abs(np.array(i) - np.array(o)).max() == 0 \ No newline at end of file + for output_type in ['pt','np','pil']: + out = image_processor.decode( + image_processor.encode(input_pil), + output_type=output_type) + for i, o in zip(input_pil, out): + in_np = np.array(i) + out_np = self.to_np(out) if output_type == 'pil' else (self.to_np(out) * 255).round() + assert np.abs(in_np - out_np).max() < 1e-6, f"decoded output does not match input for output_type {output_type}" \ No newline at end of file From da62e8d3a3a1c85ae891fc314640508bd9858c3d Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Thu, 9 Mar 2023 21:48:38 +0000 Subject: [PATCH 04/45] make style --- src/diffusers/__init__.py | 3 +- src/diffusers/image_processor.py | 65 ++++++++++--------- .../pipeline_stable_diffusion_img2img.py | 13 ++-- tests/test_image_processor.py | 51 ++++++++------- 4 files changed, 66 insertions(+), 66 deletions(-) diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index bde1b37c858f..cebe6d9bcfaf 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -1,6 +1,7 @@ __version__ = "0.15.0.dev0" from .configuration_utils import ConfigMixin +from .image_processor import VaeImageProcessor from .utils import ( OptionalDependencyNotAvailable, is_flax_available, @@ -16,7 +17,7 @@ is_unidecode_available, logging, ) -from .image_processor import VaeImageProcessor + try: if not is_onnx_available(): diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index ec60b4a1030d..c1f877ff694f 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -12,15 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Union, Optional +from typing import Union +import numpy as np import PIL -from PIL import Image import torch -import numpy as np +from PIL import Image -from .utils import PIL_INTERPOLATION, CONFIG_NAME from .configuration_utils import ConfigMixin, register_to_config +from .utils import CONFIG_NAME, PIL_INTERPOLATION + class VaeImageProcessor(ConfigMixin): """ @@ -31,7 +32,7 @@ class VaeImageProcessor(ConfigMixin): Args: do_resize (`bool`, *optional*, defaults to `True`): - Whether to resize the image's (height, width) dimensions to the specified `size`. + Whether to resize the image's (height, width) dimensions to the specified `size`. `do_resize` in the `preprocess` method. vae_scale_factor (`int`, *optional*, defaults to `8`): scale factor in VAE, if do_resize is True, the image will be automatically resized to multipls of vae_scale_factor @@ -40,7 +41,7 @@ class VaeImageProcessor(ConfigMixin): do_normalize (`bool`, *optional*, defaults to `True`): Whether to normalize the image to [-1,1] """ - + config_name = CONFIG_NAME @register_to_config @@ -68,15 +69,15 @@ def numpy_to_pil(images): pil_images = [Image.fromarray(image) for image in images] return pil_images - + @staticmethod def numpy_to_pt(images): """ Convert a numpy image to a pytorch tensor """ - if images.ndim ==3: - images = images[...,None] - elif images.ndim==5: + if images.ndim == 3: + images = images[..., None] + elif images.ndim == 5: images = images.squeeze(0) images = torch.from_numpy(images.transpose(0, 3, 1, 2)) return images @@ -95,7 +96,7 @@ def normalize(images): Normalize an image array to [-1,1] """ return 2.0 * images - 1.0 - + def resize(self, images: PIL.Image.Image) -> PIL.Image.Image: """ Resize an PIL image. Both height and width will be resized to integer multiple of vae_scale_factor @@ -106,21 +107,22 @@ def resize(self, images: PIL.Image.Image) -> PIL.Image.Image: return images def encode( - self, + self, image: Union[torch.FloatTensor, PIL.Image.Image, np.ndarray], ) -> torch.Tensor: - """ Preprocess the image input, accpet formats in PIL images, numpy arrays or pytorch tensors" """ - supported_formats = (PIL.Image.Image, np.ndarray, torch.Tensor) + supported_formats = (PIL.Image.Image, np.ndarray, torch.Tensor) if isinstance(image, supported_formats): image = [image] elif isinstance(image, list) and all(isinstance(i, supported_formats) for i in image): image = image else: - raise ValueError(f"incorrect image format is used - currently we only support PIL image, numpy array or pytorch tensor") - + raise ValueError( + "incorrect image format is used - currently we only support PIL image, numpy array or pytorch tensor" + ) + if isinstance(image[0], PIL.Image.Image): if self.do_resize: image = [self.resize(i) for i in image] @@ -130,44 +132,43 @@ def encode( image = self.numpy_to_pt(np.stack(image, axis=0)) elif not isinstance(image, torch.Tensor) and isinstance(image[0], torch.Tensor): image = torch.cat(image, dim=0) - - if image.ndim==5: + + if image.ndim == 5: image = image.squeeze(0) _, _, height, width = image.shape if self.do_resize and (height % self.vae_scale_factor != 0 or width % self.vae_scale_factor != 0): - raise ValueError(f"the height and width of image have to be divisible by {self.vae_scale_factor} but are {height} and {width}.") + raise ValueError( + f"the height and width of image have to be divisible by {self.vae_scale_factor} but are {height} and {width}." + ) # expected range [0,1], normalize to [-1,1] do_normalize = self.do_normalize - if image.min() < 0: + if image.min() < 0: warnings.warn( "Passing `image` as torch tensor with value range in [-1,1] is deprecated. The expected value range for image tensor is [0,1] " f"when passing as pytorch tensor or numpy Array. You passed `image` with value range [{image.min()},{image.max()}]", FutureWarning, - ) + ) do_normalize = False - + if do_normalize: image = self.normalize(image) return image def decode( - self, + self, image, - output_type: str ='pil', - ): - - if output_type == 'pt': + output_type: str = "pil", + ): + if output_type == "pt": return image - + image = self.pt_to_numpy(image) - if output_type == 'np': + if output_type == "np": return image - elif output_type == 'pil': + elif output_type == "pil": return self.numpy_to_pil(image) else: raise ValueError(f"Unsupported output_type {output_type}.") - - \ No newline at end of file diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index b1754987adc1..34eeae444359 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -15,17 +15,16 @@ import inspect from typing import Callable, List, Optional, Union -import numpy as np import PIL import torch from packaging import version from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer from ...configuration_utils import FrozenDict +from ...image_processor import VaeImageProcessor from ...models import AutoencoderKL, UNet2DConditionModel from ...schedulers import KarrasDiffusionSchedulers from ...utils import ( - PIL_INTERPOLATION, deprecate, is_accelerate_available, is_accelerate_version, @@ -36,7 +35,6 @@ from ..pipeline_utils import DiffusionPipeline from . import StableDiffusionPipelineOutput from .safety_checker import StableDiffusionSafetyChecker -from ...image_processor import VaeImageProcessor logger = logging.get_logger(__name__) # pylint: disable=invalid-name @@ -175,7 +173,7 @@ def __init__( deprecate("sample_size<64", "1.0.0", deprecation_message, standard_warn=False) new_config = dict(unet.config) new_config["sample_size"] = 64 - unet._internal_dict = FrozenDict(new_config) + unet._internal_dict = FrozenDict(new_config) self.register_modules( vae=vae, text_encoder=text_encoder, @@ -187,11 +185,10 @@ def __init__( ) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) - vae_feature_extractor = VaeImageProcessor( - vae_scale_factor =self.vae_scale_factor) + VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.register_to_config( requires_safety_checker=requires_safety_checker, - ) + ) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload def enable_sequential_cpu_offload(self, gpu_id=0): @@ -697,7 +694,7 @@ def __call__( progress_bar.update() if callback is not None and i % callback_steps == 0: callback(i, t, latents) - + # 9. Post-processing image = self.decode_latents(latents) diff --git a/tests/test_image_processor.py b/tests/test_image_processor.py index d3e683988d9c..6734611e98dd 100644 --- a/tests/test_image_processor.py +++ b/tests/test_image_processor.py @@ -14,16 +14,15 @@ # limitations under the License. import unittest -import torch -import numpy as np +import numpy as np import PIL +import torch from diffusers import VaeImageProcessor class ImageProcessorTest(unittest.TestCase): - @property def dummy_sample(self): batch_size = 1 @@ -37,50 +36,52 @@ def dummy_sample(self): def to_np(self, image): if isinstance(image[0], PIL.Image.Image): - return np.stack([np.array(i) for i in image],axis=0) + return np.stack([np.array(i) for i in image], axis=0) elif isinstance(image, torch.Tensor): return image.cpu().numpy().transpose(0, 2, 3, 1) return image def test_encode_input_pt(self): image_processor = VaeImageProcessor(do_resize=False, do_normalize=False) - + input_pt = self.dummy_sample input_np = self.to_np(input_pt) - - for output_type in ['pt','np','pil']: + + for output_type in ["pt", "np", "pil"]: out = image_processor.decode( image_processor.encode(input_pt), output_type=output_type, - ) + ) out_np = self.to_np(out) - in_np = (input_np * 255).round() if output_type == 'pil' else input_np - assert np.abs(in_np - out_np).max() < 1e-6, f"decoded output does not match input for output_type {output_type}" + in_np = (input_np * 255).round() if output_type == "pil" else input_np + assert ( + np.abs(in_np - out_np).max() < 1e-6 + ), f"decoded output does not match input for output_type {output_type}" def test_encode_input_np(self): image_processor = VaeImageProcessor(do_resize=False, do_normalize=False) input_np = self.dummy_sample.cpu().numpy().transpose(0, 2, 3, 1) - - for output_type in ['pt','np','pil']: - out = image_processor.decode( - image_processor.encode(input_np), - output_type=output_type) - + + for output_type in ["pt", "np", "pil"]: + out = image_processor.decode(image_processor.encode(input_np), output_type=output_type) + out_np = self.to_np(out) - in_np = (input_np * 255).round() if output_type == 'pil' else input_np - assert np.abs(in_np - out_np).max() < 1e-6, f"decoded output does not match input for output_type {output_type}" + in_np = (input_np * 255).round() if output_type == "pil" else input_np + assert ( + np.abs(in_np - out_np).max() < 1e-6 + ), f"decoded output does not match input for output_type {output_type}" def test_encode_input_pil(self): image_processor = VaeImageProcessor(do_resize=False, do_normalize=False) input_np = self.dummy_sample.cpu().numpy().transpose(0, 2, 3, 1) input_pil = image_processor.numpy_to_pil(input_np) - - for output_type in ['pt','np','pil']: - out = image_processor.decode( - image_processor.encode(input_pil), - output_type=output_type) + + for output_type in ["pt", "np", "pil"]: + out = image_processor.decode(image_processor.encode(input_pil), output_type=output_type) for i, o in zip(input_pil, out): in_np = np.array(i) - out_np = self.to_np(out) if output_type == 'pil' else (self.to_np(out) * 255).round() - assert np.abs(in_np - out_np).max() < 1e-6, f"decoded output does not match input for output_type {output_type}" \ No newline at end of file + out_np = self.to_np(out) if output_type == "pil" else (self.to_np(out) * 255).round() + assert ( + np.abs(in_np - out_np).max() < 1e-6 + ), f"decoded output does not match input for output_type {output_type}" From 98146d08b3537fca2c59335fdf211fa13274883a Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Thu, 9 Mar 2023 21:53:30 +0000 Subject: [PATCH 05/45] fix --- src/diffusers/image_processor.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index c1f877ff694f..ac10fb269fba 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import warnings from typing import Union import numpy as np @@ -27,15 +28,13 @@ class VaeImageProcessor(ConfigMixin): """ Image Processor for VAE - [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__` - function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`. - Args: do_resize (`bool`, *optional*, defaults to `True`): - Whether to resize the image's (height, width) dimensions to the specified `size`. - `do_resize` in the `preprocess` method. + Whether to resize the image's (height, width) dimensions to the specified `size`. `do_resize` in the + `preprocess` method. vae_scale_factor (`int`, *optional*, defaults to `8`): - scale factor in VAE, if do_resize is True, the image will be automatically resized to multipls of vae_scale_factor + scale factor in VAE, if do_resize is True, the image will be automatically resized to multipls of + vae_scale_factor resample (`str`, *optional*, defaults to `lanczos`): Resampling filter to use if resizing the image. do_normalize (`bool`, *optional*, defaults to `True`): @@ -73,7 +72,7 @@ def numpy_to_pil(images): @staticmethod def numpy_to_pt(images): """ - Convert a numpy image to a pytorch tensor + Convert a numpy image to a pytorch tensor """ if images.ndim == 3: images = images[..., None] @@ -85,7 +84,7 @@ def numpy_to_pt(images): @staticmethod def pt_to_numpy(images): """ - Convert a numpy image to a pytorch tensor + Convert a numpy image to a pytorch tensor """ images = images.cpu().numpy().transpose(0, 2, 3, 1) return images From d223e8e7762f36f9faef83e51d6fb57aae67112a Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Thu, 9 Mar 2023 22:52:35 +0000 Subject: [PATCH 06/45] update img2mg --- src/diffusers/__init__.py | 2 +- .../pipeline_stable_diffusion_img2img.py | 49 +++++++++++++------ .../test_stable_diffusion_img2img.py | 2 +- 3 files changed, 37 insertions(+), 16 deletions(-) diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index cebe6d9bcfaf..c239b8da73cd 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -1,7 +1,6 @@ __version__ = "0.15.0.dev0" from .configuration_utils import ConfigMixin -from .image_processor import VaeImageProcessor from .utils import ( OptionalDependencyNotAvailable, is_flax_available, @@ -91,6 +90,7 @@ VQDiffusionScheduler, ) from .training_utils import EMAModel + from .image_processor import VaeImageProcessor try: if not (is_torch_available() and is_scipy_available()): diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 34eeae444359..5be078ac16dc 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -185,7 +185,7 @@ def __init__( ) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) - VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) + self.vae_feature_extractor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.register_to_config( requires_safety_checker=requires_safety_checker, ) @@ -403,10 +403,9 @@ def _encode_prompt( return prompt_embeds - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker def run_safety_checker(self, image, device, dtype): if self.safety_checker is not None: - safety_checker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to(device) + safety_checker_input = self.feature_extractor(self.vae_feature_extractor.numpy_to_pil(image), return_tensors="pt").to(device) image, has_nsfw_concept = self.safety_checker( images=image, clip_input=safety_checker_input.pixel_values.to(dtype) ) @@ -420,7 +419,7 @@ def decode_latents(self, latents): image = self.vae.decode(latents).sample image = (image / 2 + 0.5).clamp(0, 1) # we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16 - image = image.cpu().permute(0, 2, 3, 1).float().numpy() + #image = image.cpu().permute(0, 2, 3, 1).float().numpy() return image # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs @@ -694,16 +693,38 @@ def __call__( progress_bar.update() if callback is not None and i % callback_steps == 0: callback(i, t, latents) - - # 9. Post-processing - image = self.decode_latents(latents) - - # 10. Run safety checker - image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype) - - # 11. Convert to PIL - if output_type == "pil": - image = self.numpy_to_pil(image) + + if output_type is None: + output_type = 'np' + + if output_type == "latent": + image = latents + has_nsfw_concept = None + elif output_type == "pt": + # 8. Post-processing + image = self.decode_latents(latents) + has_nsfw_concept = None + elif output_type == "np": + # 8. Post-processing + image = self.decode_latents(latents) + + # 9. Run safety checker + image = self.vae_feature_extractor.decode(image, output_type='np') + image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype) + + elif output_type == 'pil': + # 8. Post-processing + image = self.decode_latents(latents) + + # 9. Run safety checker + image = self.vae_feature_extractor.decode(image, output_type='np') + image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype) + + # 10. Convert to PIL + image = self.vae_feature_extractor.numpy_to_pil(image) + + else: + raise ValueError(f"Unsupported output_type {output_type} ") # Offload last model to CPU if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py index 77dfa9be1d1e..a8cd62a1c198 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py @@ -106,7 +106,7 @@ def get_dummy_inputs(self, device, seed=0): "generator": generator, "num_inference_steps": 2, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", } return inputs From 5eb759213c273ac2e0db6c11559b41629e1a38c5 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Thu, 9 Mar 2023 22:53:07 +0000 Subject: [PATCH 07/45] style --- src/diffusers/__init__.py | 2 +- .../pipeline_stable_diffusion_img2img.py | 24 ++++++++++--------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index c239b8da73cd..4315669cc459 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -32,6 +32,7 @@ except OptionalDependencyNotAvailable: from .utils.dummy_pt_objects import * # noqa F403 else: + from .image_processor import VaeImageProcessor from .models import ( AutoencoderKL, ControlNetModel, @@ -90,7 +91,6 @@ VQDiffusionScheduler, ) from .training_utils import EMAModel - from .image_processor import VaeImageProcessor try: if not (is_torch_available() and is_scipy_available()): diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 5be078ac16dc..5df32865e8c3 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -405,7 +405,9 @@ def _encode_prompt( def run_safety_checker(self, image, device, dtype): if self.safety_checker is not None: - safety_checker_input = self.feature_extractor(self.vae_feature_extractor.numpy_to_pil(image), return_tensors="pt").to(device) + safety_checker_input = self.feature_extractor( + self.vae_feature_extractor.numpy_to_pil(image), return_tensors="pt" + ).to(device) image, has_nsfw_concept = self.safety_checker( images=image, clip_input=safety_checker_input.pixel_values.to(dtype) ) @@ -419,7 +421,7 @@ def decode_latents(self, latents): image = self.vae.decode(latents).sample image = (image / 2 + 0.5).clamp(0, 1) # we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16 - #image = image.cpu().permute(0, 2, 3, 1).float().numpy() + # image = image.cpu().permute(0, 2, 3, 1).float().numpy() return image # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs @@ -693,10 +695,10 @@ def __call__( progress_bar.update() if callback is not None and i % callback_steps == 0: callback(i, t, latents) - - if output_type is None: - output_type = 'np' - + + if output_type is None: + output_type = "np" + if output_type == "latent": image = latents has_nsfw_concept = None @@ -709,20 +711,20 @@ def __call__( image = self.decode_latents(latents) # 9. Run safety checker - image = self.vae_feature_extractor.decode(image, output_type='np') + image = self.vae_feature_extractor.decode(image, output_type="np") image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype) - - elif output_type == 'pil': + + elif output_type == "pil": # 8. Post-processing image = self.decode_latents(latents) # 9. Run safety checker - image = self.vae_feature_extractor.decode(image, output_type='np') + image = self.vae_feature_extractor.decode(image, output_type="np") image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype) # 10. Convert to PIL image = self.vae_feature_extractor.numpy_to_pil(image) - + else: raise ValueError(f"Unsupported output_type {output_type} ") From af21a0d8fcb7726a498515d2f0a596e849480f04 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Thu, 9 Mar 2023 23:09:59 +0000 Subject: [PATCH 08/45] fix --- src/diffusers/image_processor.py | 2 +- .../stable_diffusion/pipeline_stable_diffusion_img2img.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index ac10fb269fba..f97f77860b4d 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -86,7 +86,7 @@ def pt_to_numpy(images): """ Convert a numpy image to a pytorch tensor """ - images = images.cpu().numpy().transpose(0, 2, 3, 1) + images = images.cpu().permute(0, 2, 3, 1).float().numpy() return images @staticmethod diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 5df32865e8c3..60a10cb37277 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -696,16 +696,15 @@ def __call__( if callback is not None and i % callback_steps == 0: callback(i, t, latents) - if output_type is None: - output_type = "np" - if output_type == "latent": image = latents has_nsfw_concept = None + elif output_type == "pt": # 8. Post-processing image = self.decode_latents(latents) has_nsfw_concept = None + elif output_type == "np": # 8. Post-processing image = self.decode_latents(latents) From 803c93e2140c738f6315a447d2d40b105936b280 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Sun, 12 Mar 2023 23:59:05 +0000 Subject: [PATCH 09/45] apply feedbacks --- src/diffusers/image_processor.py | 18 ++--- .../pipeline_stable_diffusion_img2img.py | 53 +++++-------- .../test_stable_diffusion_img2img.py | 63 ++++++++++++--- tests/test_image_processor.py | 79 +++++++++++++++++-- 4 files changed, 153 insertions(+), 60 deletions(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index f97f77860b4d..d8edc8223cc8 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -76,8 +76,7 @@ def numpy_to_pt(images): """ if images.ndim == 3: images = images[..., None] - elif images.ndim == 5: - images = images.squeeze(0) + images = torch.from_numpy(images.transpose(0, 3, 1, 2)) return images @@ -105,7 +104,7 @@ def resize(self, images: PIL.Image.Image) -> PIL.Image.Image: images = images.resize((w, h), resample=PIL_INTERPOLATION[self.resample]) return images - def encode( + def preprocess( self, image: Union[torch.FloatTensor, PIL.Image.Image, np.ndarray], ) -> torch.Tensor: @@ -128,12 +127,11 @@ def encode( image = [np.array(i).astype(np.float32) / 255.0 for i in image] if isinstance(image[0], np.ndarray): - image = self.numpy_to_pt(np.stack(image, axis=0)) - elif not isinstance(image, torch.Tensor) and isinstance(image[0], torch.Tensor): - image = torch.cat(image, dim=0) + image = np.concatenate(image, axis=0) if image[0].ndim == 4 else np.stack(image, axis=0) + image = self.numpy_to_pt(image) + elif isinstance(image[0], torch.Tensor): + image = torch.cat(image, axis=0) if image[0].ndim == 4 else torch.stack(image, axis=0) - if image.ndim == 5: - image = image.squeeze(0) _, _, height, width = image.shape if self.do_resize and (height % self.vae_scale_factor != 0 or width % self.vae_scale_factor != 0): raise ValueError( @@ -155,12 +153,12 @@ def encode( return image - def decode( + def postprocess( self, image, output_type: str = "pil", ): - if output_type == "pt": + if isinstance(image, torch.Tensor) and output_type == "pt": return image image = self.pt_to_numpy(image) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 60a10cb37277..444354417168 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -185,7 +185,7 @@ def __init__( ) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) - self.vae_feature_extractor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) + self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.register_to_config( requires_safety_checker=requires_safety_checker, ) @@ -404,15 +404,14 @@ def _encode_prompt( return prompt_embeds def run_safety_checker(self, image, device, dtype): - if self.safety_checker is not None: - safety_checker_input = self.feature_extractor( - self.vae_feature_extractor.numpy_to_pil(image), return_tensors="pt" + feature_extractor_input = self.image_processor.postprocess(image, output_type='pil') + safety_checker_input = self.feature_extractor( + feature_extractor_input, + return_tensors="pt" ).to(device) - image, has_nsfw_concept = self.safety_checker( - images=image, clip_input=safety_checker_input.pixel_values.to(dtype) + image, has_nsfw_concept = self.safety_checker( + images=image, clip_input=safety_checker_input.pixel_values.to(dtype) ) - else: - has_nsfw_concept = None return image, has_nsfw_concept # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.decode_latents @@ -656,7 +655,7 @@ def __call__( ) # 4. Preprocess image - image = self.vae_feature_extractor.encode(image) + image = self.image_processor.preprocess(image) # 5. set timesteps self.scheduler.set_timesteps(num_inference_steps, device=device) @@ -695,37 +694,27 @@ def __call__( progress_bar.update() if callback is not None and i % callback_steps == 0: callback(i, t, latents) + + if output_type not in ["latent", "pt", "np", "pil"]: + deprecation_message = ( + f"the output_type {output_type} is outdated. Please make sure to set it to one of these instead: " + "`pil`, `np`, `pt`, `latent`" + ) + deprecate("Unsupported output_type", "1.0.0", deprecation_message, standard_warn=False ) + output_type = "np" if output_type == "latent": image = latents has_nsfw_concept = None - elif output_type == "pt": - # 8. Post-processing - image = self.decode_latents(latents) - has_nsfw_concept = None - - elif output_type == "np": - # 8. Post-processing - image = self.decode_latents(latents) + image = self.decode_latents(latents) - # 9. Run safety checker - image = self.vae_feature_extractor.decode(image, output_type="np") - image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype) - - elif output_type == "pil": - # 8. Post-processing - image = self.decode_latents(latents) - - # 9. Run safety checker - image = self.vae_feature_extractor.decode(image, output_type="np") + if self.safety_checker is not None: image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype) - - # 10. Convert to PIL - image = self.vae_feature_extractor.numpy_to_pil(image) - else: - raise ValueError(f"Unsupported output_type {output_type} ") + has_nsfw_concept = False + + image = self.image_processor.postprocess(image, output_type=output_type) # Offload last model to CPU if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py index a8cd62a1c198..5e91766e23a5 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py @@ -29,6 +29,7 @@ PNDMScheduler, StableDiffusionImg2ImgPipeline, UNet2DConditionModel, + VaeImageProcessor, ) from diffusers.utils import floats_tensor, load_image, load_numpy, nightly, slow, torch_device from diffusers.utils.testing_utils import require_torch_gpu, skip_mps @@ -94,19 +95,33 @@ def get_dummy_components(self): } return components - def get_dummy_inputs(self, device, seed=0): + def get_dummy_inputs(self, device, seed=0, input_image_type='pt', output_type='np'): image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) if str(device).startswith("mps"): generator = torch.manual_seed(seed) else: generator = torch.Generator(device=device).manual_seed(seed) + + if input_image_type == 'pt': + input_image = image + elif input_image_type == 'np': + input_image = image.cpu().numpy().transpose(0, 2, 3, 1) + elif input_image_type == 'pil': + input_image = image.cpu().numpy().transpose(0, 2, 3, 1) + input_image = VaeImageProcessor.numpy_to_pil(input_image) + else: + raise ValueError(f"unsupported input_image_type {input_image_type}.") + + if output_type not in ['pt', 'np', 'pil']: + raise ValueError(f"unsupported output_type {output_type}") + inputs = { "prompt": "A painting of a squirrel eating a burger", - "image": image, + "image": input_image, "generator": generator, "num_inference_steps": 2, "guidance_scale": 6.0, - "output_type": "np", + "output_type": output_type, } return inputs @@ -122,7 +137,7 @@ def test_stable_diffusion_img2img_default_case(self): image_slice = image[0, -3:, -3:, -1] assert image.shape == (1, 32, 32, 3) - expected_slice = np.array([0.4492, 0.3865, 0.4222, 0.5854, 0.5139, 0.4379, 0.4193, 0.48, 0.4218]) + expected_slice = np.array([0.46275955, 0.3977616, 0.42548066, 0.5823421, 0.50115615, 0.43968713, 0.41080174, 0.47410887, 0.42165133]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 @@ -140,8 +155,7 @@ def test_stable_diffusion_img2img_negative_prompt(self): image_slice = image[0, -3:, -3:, -1] assert image.shape == (1, 32, 32, 3) - expected_slice = np.array([0.4065, 0.3783, 0.4050, 0.5266, 0.4781, 0.4252, 0.4203, 0.4692, 0.4365]) - + expected_slice = np.array([0.4104152, 0.38498846, 0.41070235, 0.52090424, 0.47205922, 0.42849067, 0.41589636, 0.46834698, 0.4408132]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 def test_stable_diffusion_img2img_multiple_init_images(self): @@ -158,7 +172,7 @@ def test_stable_diffusion_img2img_multiple_init_images(self): image_slice = image[-1, -3:, -3:, -1] assert image.shape == (2, 32, 32, 3) - expected_slice = np.array([0.5144, 0.4447, 0.4735, 0.6676, 0.5526, 0.5454, 0.645, 0.5149, 0.4689]) + expected_slice = np.array([0.46686086, 0.44393504, 0.5084481, 0.67471784, 0.55514234, 0.5346449, 0.63654804, 0.51626045, 0.46245307]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 @@ -177,7 +191,7 @@ def test_stable_diffusion_img2img_k_lms(self): image_slice = image[0, -3:, -3:, -1] assert image.shape == (1, 32, 32, 3) - expected_slice = np.array([0.4367, 0.4986, 0.4372, 0.6706, 0.5665, 0.444, 0.5864, 0.6019, 0.5203]) + expected_slice = np.array([0.4390018, 0.49910325, 0.43994197, 0.6633433, 0.56556225, 0.44274506, 0.58594346, 0.60113865, 0.52007025]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 @@ -197,7 +211,36 @@ def test_save_load_optional_components(self): def test_attention_slicing_forward_pass(self): return super().test_attention_slicing_forward_pass() + @skip_mps + def test_pt_np_pil_outputs_equivalent(self): + device = 'cpu' + components = self.get_dummy_components() + sd_pipe = StableDiffusionImg2ImgPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + output_pt = sd_pipe(**self.get_dummy_inputs(device, output_type='pt'))[0] + output_np = sd_pipe(**self.get_dummy_inputs(device, output_type='np'))[0] + output_pil = sd_pipe(**self.get_dummy_inputs(device, output_type='pil'))[0] + assert np.abs(output_pt.cpu().numpy().transpose(0, 2, 3, 1) - output_np).max() <= 1e-4 + assert np.abs(np.array(output_pil[0]) - (output_np * 255).round()).max() <= 1e-4 + + @skip_mps + def test_image_types_consistent(self): + device = 'cpu' + components = self.get_dummy_components() + sd_pipe = StableDiffusionImg2ImgPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + output_pt = sd_pipe(**self.get_dummy_inputs(device, input_image_type='pt'))[0] + output_np = sd_pipe(**self.get_dummy_inputs(device, input_image_type='np'))[0] + output_pil = sd_pipe(**self.get_dummy_inputs(device, input_image_type='pil'))[0] + + assert np.abs(output_pt - output_np).max() <= 1e-4 + assert np.abs(output_pil - output_np).max() <= 1e-2 + @slow @require_torch_gpu class StableDiffusionImg2ImgPipelineSlowTests(unittest.TestCase): @@ -219,7 +262,7 @@ def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0 "num_inference_steps": 3, "strength": 0.75, "guidance_scale": 7.5, - "output_type": "numpy", + "output_type": "np", } return inputs @@ -426,7 +469,7 @@ def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0 "num_inference_steps": 50, "strength": 0.75, "guidance_scale": 7.5, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/test_image_processor.py b/tests/test_image_processor.py index 6734611e98dd..87ef11aa611e 100644 --- a/tests/test_image_processor.py +++ b/tests/test_image_processor.py @@ -40,16 +40,16 @@ def to_np(self, image): elif isinstance(image, torch.Tensor): return image.cpu().numpy().transpose(0, 2, 3, 1) return image - - def test_encode_input_pt(self): + + def test_vae_image_processor_pt(self): image_processor = VaeImageProcessor(do_resize=False, do_normalize=False) input_pt = self.dummy_sample input_np = self.to_np(input_pt) for output_type in ["pt", "np", "pil"]: - out = image_processor.decode( - image_processor.encode(input_pt), + out = image_processor.postprocess( + image_processor.preprocess(input_pt), output_type=output_type, ) out_np = self.to_np(out) @@ -58,12 +58,12 @@ def test_encode_input_pt(self): np.abs(in_np - out_np).max() < 1e-6 ), f"decoded output does not match input for output_type {output_type}" - def test_encode_input_np(self): + def test_vae_image_processor_np(self): image_processor = VaeImageProcessor(do_resize=False, do_normalize=False) input_np = self.dummy_sample.cpu().numpy().transpose(0, 2, 3, 1) for output_type in ["pt", "np", "pil"]: - out = image_processor.decode(image_processor.encode(input_np), output_type=output_type) + out = image_processor.postprocess(image_processor.preprocess(input_np), output_type=output_type) out_np = self.to_np(out) in_np = (input_np * 255).round() if output_type == "pil" else input_np @@ -71,17 +71,80 @@ def test_encode_input_np(self): np.abs(in_np - out_np).max() < 1e-6 ), f"decoded output does not match input for output_type {output_type}" - def test_encode_input_pil(self): + def test_vae_image_processor_pil(self): image_processor = VaeImageProcessor(do_resize=False, do_normalize=False) input_np = self.dummy_sample.cpu().numpy().transpose(0, 2, 3, 1) input_pil = image_processor.numpy_to_pil(input_np) for output_type in ["pt", "np", "pil"]: - out = image_processor.decode(image_processor.encode(input_pil), output_type=output_type) + out = image_processor.postprocess(image_processor.preprocess(input_pil), output_type=output_type) for i, o in zip(input_pil, out): in_np = np.array(i) out_np = self.to_np(out) if output_type == "pil" else (self.to_np(out) * 255).round() assert ( np.abs(in_np - out_np).max() < 1e-6 ), f"decoded output does not match input for output_type {output_type}" + + def test_preprocess_input_3d(self): + image_processor = VaeImageProcessor(do_resize=False, do_normalize=False) + + input_pt_4d = self.dummy_sample + input_pt_3d = input_pt_4d.squeeze(0) + + out_pt_4d = image_processor.postprocess( + image_processor.preprocess(input_pt_4d), + output_type="np", + ) + out_pt_3d = image_processor.postprocess( + image_processor.preprocess(input_pt_3d), + output_type="np", + ) + + input_np_4d = self.to_np(self.dummy_sample) + input_np_3d = input_np_4d.squeeze(0) + + out_np_4d = image_processor.postprocess( + image_processor.preprocess(input_np_4d), + output_type="np", + ) + out_np_3d = image_processor.postprocess( + image_processor.preprocess(input_np_3d), + output_type="np", + ) + + assert np.abs(out_pt_4d - out_pt_3d).max() < 1e-6 + assert np.abs(out_np_4d - out_np_3d).max() < 1e-6 + + def test_preprocess_input_list(self): + image_processor = VaeImageProcessor(do_resize=False, do_normalize=False) + + input_pt_4d = self.dummy_sample + input_pt_list = list(input_pt_4d) + + out_pt_4d = image_processor.postprocess( + image_processor.preprocess(input_pt_4d), + output_type="np", + ) + + out_pt_list = image_processor.postprocess( + image_processor.preprocess(input_pt_list), + output_type="np", + ) + + input_np_4d = self.to_np(self.dummy_sample) + input_np_list = list(input_np_4d) + + out_np_4d = image_processor.postprocess( + image_processor.preprocess(input_pt_4d), + output_type="np", + ) + + out_np_list = image_processor.postprocess( + image_processor.preprocess(input_pt_list), + output_type="np", + ) + + assert np.abs(out_pt_4d - out_pt_list).max() < 1e-6 + assert np.abs(out_np_4d - out_np_list).max() < 1e-6 + From 5c6de08f0e147a9a8499892e1ddd9f3300b85878 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Mon, 13 Mar 2023 00:04:34 +0000 Subject: [PATCH 10/45] fix style --- .../pipeline_stable_diffusion_img2img.py | 15 ++--- .../test_stable_diffusion_img2img.py | 53 ++++++++++------- tests/test_image_processor.py | 59 +++++++++---------- 3 files changed, 66 insertions(+), 61 deletions(-) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 444354417168..25e580bcd096 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -404,14 +404,11 @@ def _encode_prompt( return prompt_embeds def run_safety_checker(self, image, device, dtype): - feature_extractor_input = self.image_processor.postprocess(image, output_type='pil') - safety_checker_input = self.feature_extractor( - feature_extractor_input, - return_tensors="pt" - ).to(device) + feature_extractor_input = self.image_processor.postprocess(image, output_type="pil") + safety_checker_input = self.feature_extractor(feature_extractor_input, return_tensors="pt").to(device) image, has_nsfw_concept = self.safety_checker( images=image, clip_input=safety_checker_input.pixel_values.to(dtype) - ) + ) return image, has_nsfw_concept # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.decode_latents @@ -694,13 +691,13 @@ def __call__( progress_bar.update() if callback is not None and i % callback_steps == 0: callback(i, t, latents) - + if output_type not in ["latent", "pt", "np", "pil"]: deprecation_message = ( f"the output_type {output_type} is outdated. Please make sure to set it to one of these instead: " "`pil`, `np`, `pt`, `latent`" ) - deprecate("Unsupported output_type", "1.0.0", deprecation_message, standard_warn=False ) + deprecate("Unsupported output_type", "1.0.0", deprecation_message, standard_warn=False) output_type = "np" if output_type == "latent": @@ -713,7 +710,7 @@ def __call__( image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype) else: has_nsfw_concept = False - + image = self.image_processor.postprocess(image, output_type=output_type) # Offload last model to CPU diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py index 5e91766e23a5..504fc416e244 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py @@ -95,24 +95,24 @@ def get_dummy_components(self): } return components - def get_dummy_inputs(self, device, seed=0, input_image_type='pt', output_type='np'): + def get_dummy_inputs(self, device, seed=0, input_image_type="pt", output_type="np"): image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) if str(device).startswith("mps"): generator = torch.manual_seed(seed) else: generator = torch.Generator(device=device).manual_seed(seed) - - if input_image_type == 'pt': + + if input_image_type == "pt": input_image = image - elif input_image_type == 'np': + elif input_image_type == "np": input_image = image.cpu().numpy().transpose(0, 2, 3, 1) - elif input_image_type == 'pil': + elif input_image_type == "pil": input_image = image.cpu().numpy().transpose(0, 2, 3, 1) input_image = VaeImageProcessor.numpy_to_pil(input_image) else: raise ValueError(f"unsupported input_image_type {input_image_type}.") - - if output_type not in ['pt', 'np', 'pil']: + + if output_type not in ["pt", "np", "pil"]: raise ValueError(f"unsupported output_type {output_type}") inputs = { @@ -137,7 +137,9 @@ def test_stable_diffusion_img2img_default_case(self): image_slice = image[0, -3:, -3:, -1] assert image.shape == (1, 32, 32, 3) - expected_slice = np.array([0.46275955, 0.3977616, 0.42548066, 0.5823421, 0.50115615, 0.43968713, 0.41080174, 0.47410887, 0.42165133]) + expected_slice = np.array( + [0.46275955, 0.3977616, 0.42548066, 0.5823421, 0.50115615, 0.43968713, 0.41080174, 0.47410887, 0.42165133] + ) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 @@ -155,7 +157,9 @@ def test_stable_diffusion_img2img_negative_prompt(self): image_slice = image[0, -3:, -3:, -1] assert image.shape == (1, 32, 32, 3) - expected_slice = np.array([0.4104152, 0.38498846, 0.41070235, 0.52090424, 0.47205922, 0.42849067, 0.41589636, 0.46834698, 0.4408132]) + expected_slice = np.array( + [0.4104152, 0.38498846, 0.41070235, 0.52090424, 0.47205922, 0.42849067, 0.41589636, 0.46834698, 0.4408132] + ) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 def test_stable_diffusion_img2img_multiple_init_images(self): @@ -172,7 +176,9 @@ def test_stable_diffusion_img2img_multiple_init_images(self): image_slice = image[-1, -3:, -3:, -1] assert image.shape == (2, 32, 32, 3) - expected_slice = np.array([0.46686086, 0.44393504, 0.5084481, 0.67471784, 0.55514234, 0.5346449, 0.63654804, 0.51626045, 0.46245307]) + expected_slice = np.array( + [0.46686086, 0.44393504, 0.5084481, 0.67471784, 0.55514234, 0.5346449, 0.63654804, 0.51626045, 0.46245307] + ) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 @@ -191,7 +197,9 @@ def test_stable_diffusion_img2img_k_lms(self): image_slice = image[0, -3:, -3:, -1] assert image.shape == (1, 32, 32, 3) - expected_slice = np.array([0.4390018, 0.49910325, 0.43994197, 0.6633433, 0.56556225, 0.44274506, 0.58594346, 0.60113865, 0.52007025]) + expected_slice = np.array( + [0.4390018, 0.49910325, 0.43994197, 0.6633433, 0.56556225, 0.44274506, 0.58594346, 0.60113865, 0.52007025] + ) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 @@ -213,34 +221,35 @@ def test_attention_slicing_forward_pass(self): @skip_mps def test_pt_np_pil_outputs_equivalent(self): - device = 'cpu' + device = "cpu" components = self.get_dummy_components() sd_pipe = StableDiffusionImg2ImgPipeline(**components) sd_pipe = sd_pipe.to(device) sd_pipe.set_progress_bar_config(disable=None) - - output_pt = sd_pipe(**self.get_dummy_inputs(device, output_type='pt'))[0] - output_np = sd_pipe(**self.get_dummy_inputs(device, output_type='np'))[0] - output_pil = sd_pipe(**self.get_dummy_inputs(device, output_type='pil'))[0] + + output_pt = sd_pipe(**self.get_dummy_inputs(device, output_type="pt"))[0] + output_np = sd_pipe(**self.get_dummy_inputs(device, output_type="np"))[0] + output_pil = sd_pipe(**self.get_dummy_inputs(device, output_type="pil"))[0] assert np.abs(output_pt.cpu().numpy().transpose(0, 2, 3, 1) - output_np).max() <= 1e-4 assert np.abs(np.array(output_pil[0]) - (output_np * 255).round()).max() <= 1e-4 @skip_mps def test_image_types_consistent(self): - device = 'cpu' + device = "cpu" components = self.get_dummy_components() sd_pipe = StableDiffusionImg2ImgPipeline(**components) sd_pipe = sd_pipe.to(device) sd_pipe.set_progress_bar_config(disable=None) - - output_pt = sd_pipe(**self.get_dummy_inputs(device, input_image_type='pt'))[0] - output_np = sd_pipe(**self.get_dummy_inputs(device, input_image_type='np'))[0] - output_pil = sd_pipe(**self.get_dummy_inputs(device, input_image_type='pil'))[0] + + output_pt = sd_pipe(**self.get_dummy_inputs(device, input_image_type="pt"))[0] + output_np = sd_pipe(**self.get_dummy_inputs(device, input_image_type="np"))[0] + output_pil = sd_pipe(**self.get_dummy_inputs(device, input_image_type="pil"))[0] assert np.abs(output_pt - output_np).max() <= 1e-4 assert np.abs(output_pil - output_np).max() <= 1e-2 - + + @slow @require_torch_gpu class StableDiffusionImg2ImgPipelineSlowTests(unittest.TestCase): diff --git a/tests/test_image_processor.py b/tests/test_image_processor.py index 87ef11aa611e..ce2483590668 100644 --- a/tests/test_image_processor.py +++ b/tests/test_image_processor.py @@ -40,7 +40,7 @@ def to_np(self, image): elif isinstance(image, torch.Tensor): return image.cpu().numpy().transpose(0, 2, 3, 1) return image - + def test_vae_image_processor_pt(self): image_processor = VaeImageProcessor(do_resize=False, do_normalize=False) @@ -93,58 +93,57 @@ def test_preprocess_input_3d(self): input_pt_3d = input_pt_4d.squeeze(0) out_pt_4d = image_processor.postprocess( - image_processor.preprocess(input_pt_4d), - output_type="np", - ) + image_processor.preprocess(input_pt_4d), + output_type="np", + ) out_pt_3d = image_processor.postprocess( - image_processor.preprocess(input_pt_3d), - output_type="np", - ) + image_processor.preprocess(input_pt_3d), + output_type="np", + ) input_np_4d = self.to_np(self.dummy_sample) input_np_3d = input_np_4d.squeeze(0) out_np_4d = image_processor.postprocess( - image_processor.preprocess(input_np_4d), - output_type="np", - ) + image_processor.preprocess(input_np_4d), + output_type="np", + ) out_np_3d = image_processor.postprocess( - image_processor.preprocess(input_np_3d), - output_type="np", - ) - + image_processor.preprocess(input_np_3d), + output_type="np", + ) + assert np.abs(out_pt_4d - out_pt_3d).max() < 1e-6 assert np.abs(out_np_4d - out_np_3d).max() < 1e-6 def test_preprocess_input_list(self): image_processor = VaeImageProcessor(do_resize=False, do_normalize=False) - + input_pt_4d = self.dummy_sample input_pt_list = list(input_pt_4d) out_pt_4d = image_processor.postprocess( - image_processor.preprocess(input_pt_4d), - output_type="np", - ) + image_processor.preprocess(input_pt_4d), + output_type="np", + ) out_pt_list = image_processor.postprocess( - image_processor.preprocess(input_pt_list), - output_type="np", - ) - + image_processor.preprocess(input_pt_list), + output_type="np", + ) + input_np_4d = self.to_np(self.dummy_sample) - input_np_list = list(input_np_4d) + list(input_np_4d) out_np_4d = image_processor.postprocess( - image_processor.preprocess(input_pt_4d), - output_type="np", - ) + image_processor.preprocess(input_pt_4d), + output_type="np", + ) out_np_list = image_processor.postprocess( - image_processor.preprocess(input_pt_list), - output_type="np", - ) + image_processor.preprocess(input_pt_list), + output_type="np", + ) assert np.abs(out_pt_4d - out_pt_list).max() < 1e-6 assert np.abs(out_np_4d - out_np_list).max() < 1e-6 - From e07a9bea6805f4ca9c844d1bb3be1cf4396d398e Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Mon, 13 Mar 2023 00:16:35 +0000 Subject: [PATCH 11/45] remove fixed copies on img2img preprocess --- .../pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py | 1 - .../pipelines/stable_diffusion/pipeline_cycle_diffusion.py | 1 - .../stable_diffusion/pipeline_stable_diffusion_depth2img.py | 1 - .../stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py | 1 - 4 files changed, 4 deletions(-) diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py index 1e7872e3b081..d6e448443d60 100644 --- a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +++ b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py @@ -64,7 +64,6 @@ """ -# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess def preprocess(image): if isinstance(image, torch.Tensor): return image diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py index e977071b9c6c..d54d0b7b8466 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py @@ -35,7 +35,6 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess def preprocess(image): if isinstance(image, torch.Tensor): return image diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py index 6c02e06a6523..39df07e4e527 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py @@ -32,7 +32,6 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess def preprocess(image): if isinstance(image, torch.Tensor): return image diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py index b5a352c785ee..8d34466fc6d1 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py @@ -169,7 +169,6 @@ class Pix2PixInversionPipelineOutput(BaseOutput): """ -# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess def preprocess(image): if isinstance(image, torch.Tensor): return image From cd2721fddb43ef09ceedadc7da079a5761a012f3 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Mon, 13 Mar 2023 00:21:16 +0000 Subject: [PATCH 12/45] fix --- src/diffusers/pipelines/repaint/pipeline_repaint.py | 1 - .../stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py | 1 - .../pipeline_stable_diffusion_instruct_pix2pix.py | 1 - 3 files changed, 3 deletions(-) diff --git a/src/diffusers/pipelines/repaint/pipeline_repaint.py b/src/diffusers/pipelines/repaint/pipeline_repaint.py index fabcd2610f43..4f449fddcca3 100644 --- a/src/diffusers/pipelines/repaint/pipeline_repaint.py +++ b/src/diffusers/pipelines/repaint/pipeline_repaint.py @@ -28,7 +28,6 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess def _preprocess_image(image: Union[List, PIL.Image.Image, torch.Tensor]): if isinstance(image, torch.Tensor): return image diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py index 9123e5f3296d..32ba3fa901e8 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py @@ -31,7 +31,6 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess with 8->64 def preprocess(image): if isinstance(image, torch.Tensor): return image diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py index 953df11aa4f7..35238ea6e57b 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py @@ -38,7 +38,6 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess def preprocess(image): if isinstance(image, torch.Tensor): return image From 2c702f10af64573a63578842748e72d92792cf51 Mon Sep 17 00:00:00 2001 From: YiYi Xu Date: Mon, 13 Mar 2023 14:34:27 -1000 Subject: [PATCH 13/45] Update src/diffusers/image_processor.py Co-authored-by: Pedro Cuenca --- src/diffusers/image_processor.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index d8edc8223cc8..975f150727ef 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -30,8 +30,7 @@ class VaeImageProcessor(ConfigMixin): Args: do_resize (`bool`, *optional*, defaults to `True`): - Whether to resize the image's (height, width) dimensions to the specified `size`. `do_resize` in the - `preprocess` method. + Whether to downscale the image's (height, width) dimensions to multiples of `vae_scale_factor`. vae_scale_factor (`int`, *optional*, defaults to `8`): scale factor in VAE, if do_resize is True, the image will be automatically resized to multipls of vae_scale_factor From dc508d6ddd249ab06827985731809b8c21888962 Mon Sep 17 00:00:00 2001 From: YiYi Xu Date: Mon, 13 Mar 2023 14:34:54 -1000 Subject: [PATCH 14/45] Update src/diffusers/image_processor.py Co-authored-by: Pedro Cuenca --- src/diffusers/image_processor.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index 975f150727ef..0ee19294efc9 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -32,8 +32,7 @@ class VaeImageProcessor(ConfigMixin): do_resize (`bool`, *optional*, defaults to `True`): Whether to downscale the image's (height, width) dimensions to multiples of `vae_scale_factor`. vae_scale_factor (`int`, *optional*, defaults to `8`): - scale factor in VAE, if do_resize is True, the image will be automatically resized to multipls of - vae_scale_factor + VAE scale factor. If `do_resize` is True, the image will be automatically resized to multiples of this factor. resample (`str`, *optional*, defaults to `lanczos`): Resampling filter to use if resizing the image. do_normalize (`bool`, *optional*, defaults to `True`): From 3475dec3d6b75225809f526a19b645e7b30c1b7f Mon Sep 17 00:00:00 2001 From: YiYi Xu Date: Mon, 13 Mar 2023 14:35:08 -1000 Subject: [PATCH 15/45] Update src/diffusers/image_processor.py Co-authored-by: Pedro Cuenca --- src/diffusers/image_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index 0ee19294efc9..379503e5eaf1 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -34,7 +34,7 @@ class VaeImageProcessor(ConfigMixin): vae_scale_factor (`int`, *optional*, defaults to `8`): VAE scale factor. If `do_resize` is True, the image will be automatically resized to multiples of this factor. resample (`str`, *optional*, defaults to `lanczos`): - Resampling filter to use if resizing the image. + Resampling filter to use when resizing the image. do_normalize (`bool`, *optional*, defaults to `True`): Whether to normalize the image to [-1,1] """ From e3a0b133e599b781d9367fbb8499771995321c5a Mon Sep 17 00:00:00 2001 From: YiYi Xu Date: Mon, 13 Mar 2023 14:35:22 -1000 Subject: [PATCH 16/45] Update src/diffusers/image_processor.py Co-authored-by: Pedro Cuenca --- src/diffusers/image_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index 379503e5eaf1..da9c5e8a32d0 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -95,7 +95,7 @@ def normalize(images): def resize(self, images: PIL.Image.Image) -> PIL.Image.Image: """ - Resize an PIL image. Both height and width will be resized to integer multiple of vae_scale_factor + Resize a PIL image. Both height and width will be downscaled to the next integer multiple of `vae_scale_factor` """ w, h = images.size w, h = map(lambda x: x - x % self.vae_scale_factor, (w, h)) # resize to integer multiple of vae_scale_factor From 63b2418777480d6faec2847b1110d3070511d2ce Mon Sep 17 00:00:00 2001 From: YiYi Xu Date: Mon, 13 Mar 2023 14:35:35 -1000 Subject: [PATCH 17/45] Update src/diffusers/image_processor.py Co-authored-by: Patrick von Platen --- src/diffusers/image_processor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index da9c5e8a32d0..3d3a589251b2 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -113,7 +113,6 @@ def preprocess( if isinstance(image, supported_formats): image = [image] elif isinstance(image, list) and all(isinstance(i, supported_formats) for i in image): - image = image else: raise ValueError( "incorrect image format is used - currently we only support PIL image, numpy array or pytorch tensor" From 2847d4b031467ea81ee1e5af5c401dfafdb912cd Mon Sep 17 00:00:00 2001 From: YiYi Xu Date: Mon, 13 Mar 2023 14:38:09 -1000 Subject: [PATCH 18/45] Update src/diffusers/image_processor.py Co-authored-by: Patrick von Platen --- src/diffusers/image_processor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index 3d3a589251b2..7b968e96a3ae 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -112,7 +112,6 @@ def preprocess( supported_formats = (PIL.Image.Image, np.ndarray, torch.Tensor) if isinstance(image, supported_formats): image = [image] - elif isinstance(image, list) and all(isinstance(i, supported_formats) for i in image): else: raise ValueError( "incorrect image format is used - currently we only support PIL image, numpy array or pytorch tensor" From f6e5af05a410f0a2436efb2ac841cb49a670279c Mon Sep 17 00:00:00 2001 From: YiYi Xu Date: Mon, 13 Mar 2023 14:38:19 -1000 Subject: [PATCH 19/45] Update src/diffusers/image_processor.py Co-authored-by: Patrick von Platen --- src/diffusers/image_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index 7b968e96a3ae..398ab6856c93 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -112,7 +112,7 @@ def preprocess( supported_formats = (PIL.Image.Image, np.ndarray, torch.Tensor) if isinstance(image, supported_formats): image = [image] - else: + elif not (isinstance(image, list) and all(isinstance(i, supported_formats)): raise ValueError( "incorrect image format is used - currently we only support PIL image, numpy array or pytorch tensor" ) From 771f6c0ae94292ac7a5ea5d16b93386183575efd Mon Sep 17 00:00:00 2001 From: YiYi Xu Date: Mon, 13 Mar 2023 14:38:32 -1000 Subject: [PATCH 20/45] Update src/diffusers/image_processor.py Co-authored-by: Patrick von Platen --- src/diffusers/image_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index 398ab6856c93..b9ad5ff30d8f 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -114,7 +114,7 @@ def preprocess( image = [image] elif not (isinstance(image, list) and all(isinstance(i, supported_formats)): raise ValueError( - "incorrect image format is used - currently we only support PIL image, numpy array or pytorch tensor" + f"Input is in incorrect format: {[type(i) for i in image)}. Currently, we only support {', '.join(supported_formats)}" ) if isinstance(image[0], PIL.Image.Image): From 26e95145bb01a57ce58301af8b66f043ba57661f Mon Sep 17 00:00:00 2001 From: YiYi Xu Date: Mon, 13 Mar 2023 14:39:10 -1000 Subject: [PATCH 21/45] Update src/diffusers/image_processor.py Co-authored-by: Patrick von Platen --- src/diffusers/image_processor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index b9ad5ff30d8f..f796ac6cc276 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -129,6 +129,7 @@ def preprocess( image = torch.cat(image, axis=0) if image[0].ndim == 4 else torch.stack(image, axis=0) _, _, height, width = image.shape + if self.do_resize and (height % self.vae_scale_factor != 0 or width % self.vae_scale_factor != 0): raise ValueError( f"the height and width of image have to be divisible by {self.vae_scale_factor} but are {height} and {width}." From 7c9b9f74d769154eef42de436ae12af35c424650 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Tue, 14 Mar 2023 00:52:01 +0000 Subject: [PATCH 22/45] fix typos --- src/diffusers/image_processor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index f796ac6cc276..1d20c45bd18e 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -112,9 +112,9 @@ def preprocess( supported_formats = (PIL.Image.Image, np.ndarray, torch.Tensor) if isinstance(image, supported_formats): image = [image] - elif not (isinstance(image, list) and all(isinstance(i, supported_formats)): + elif not (isinstance(image, list) and all(isinstance(i, supported_formats) for i in image)): raise ValueError( - f"Input is in incorrect format: {[type(i) for i in image)}. Currently, we only support {', '.join(supported_formats)}" + f"Input is in incorrect format: {[type(i) for i in image]}. Currently, we only support {', '.join(supported_formats)}" ) if isinstance(image[0], PIL.Image.Image): From f009e9781fd048819cd645724301020b4d53483e Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Tue, 14 Mar 2023 00:54:16 +0000 Subject: [PATCH 23/45] add back preprocess function --- .../pipeline_stable_diffusion_img2img.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 25e580bcd096..691b946dd3c2 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -68,6 +68,27 @@ """ +def preprocess(image): + if isinstance(image, torch.Tensor): + return image + elif isinstance(image, PIL.Image.Image): + image = [image] + + if isinstance(image[0], PIL.Image.Image): + w, h = image[0].size + w, h = map(lambda x: x - x % 8, (w, h)) # resize to integer multiple of 8 + + image = [np.array(i.resize((w, h), resample=PIL_INTERPOLATION["lanczos"]))[None, :] for i in image] + image = np.concatenate(image, axis=0) + image = np.array(image).astype(np.float32) / 255.0 + image = image.transpose(0, 3, 1, 2) + image = 2.0 * image - 1.0 + image = torch.from_numpy(image) + elif isinstance(image[0], torch.Tensor): + image = torch.cat(image, dim=0) + return image + + class StableDiffusionImg2ImgPipeline(DiffusionPipeline): r""" Pipeline for text-guided image to image generation using Stable Diffusion. From e2f7cf4243b57449e768d4b692a50248d4a88dc7 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Tue, 14 Mar 2023 00:55:09 +0000 Subject: [PATCH 24/45] Revert "remove fixed copies on img2img preprocess" This reverts commit e07a9bea6805f4ca9c844d1bb3be1cf4396d398e. --- .../pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py | 1 + .../pipelines/stable_diffusion/pipeline_cycle_diffusion.py | 1 + .../stable_diffusion/pipeline_stable_diffusion_depth2img.py | 1 + .../stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py | 1 + 4 files changed, 4 insertions(+) diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py index d6e448443d60..1e7872e3b081 100644 --- a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +++ b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py @@ -64,6 +64,7 @@ """ +# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess def preprocess(image): if isinstance(image, torch.Tensor): return image diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py index d54d0b7b8466..e977071b9c6c 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py @@ -35,6 +35,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name +# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess def preprocess(image): if isinstance(image, torch.Tensor): return image diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py index 39df07e4e527..6c02e06a6523 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py @@ -32,6 +32,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name +# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess def preprocess(image): if isinstance(image, torch.Tensor): return image diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py index 8d34466fc6d1..b5a352c785ee 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py @@ -169,6 +169,7 @@ class Pix2PixInversionPipelineOutput(BaseOutput): """ +# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess def preprocess(image): if isinstance(image, torch.Tensor): return image From c1569be82a303330a81bf6c4e68811036891c963 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Tue, 14 Mar 2023 00:57:17 +0000 Subject: [PATCH 25/45] Revert "fix" This reverts commit cd2721fddb43ef09ceedadc7da079a5761a012f3. --- src/diffusers/pipelines/repaint/pipeline_repaint.py | 1 + .../stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py | 1 + .../pipeline_stable_diffusion_instruct_pix2pix.py | 1 + 3 files changed, 3 insertions(+) diff --git a/src/diffusers/pipelines/repaint/pipeline_repaint.py b/src/diffusers/pipelines/repaint/pipeline_repaint.py index 4f449fddcca3..fabcd2610f43 100644 --- a/src/diffusers/pipelines/repaint/pipeline_repaint.py +++ b/src/diffusers/pipelines/repaint/pipeline_repaint.py @@ -28,6 +28,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name +# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess def _preprocess_image(image: Union[List, PIL.Image.Image, torch.Tensor]): if isinstance(image, torch.Tensor): return image diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py index 32ba3fa901e8..9123e5f3296d 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py @@ -31,6 +31,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name +# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess with 8->64 def preprocess(image): if isinstance(image, torch.Tensor): return image diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py index 35238ea6e57b..953df11aa4f7 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py @@ -38,6 +38,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name +# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess def preprocess(image): if isinstance(image, torch.Tensor): return image From 9cf2c0bc29ce3cea1f87b7fa8db4747d8296f3c5 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Tue, 14 Mar 2023 01:14:29 +0000 Subject: [PATCH 26/45] revert change in expected slice --- .../test_stable_diffusion_img2img.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py index 504fc416e244..77c943d4d30f 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py @@ -137,9 +137,7 @@ def test_stable_diffusion_img2img_default_case(self): image_slice = image[0, -3:, -3:, -1] assert image.shape == (1, 32, 32, 3) - expected_slice = np.array( - [0.46275955, 0.3977616, 0.42548066, 0.5823421, 0.50115615, 0.43968713, 0.41080174, 0.47410887, 0.42165133] - ) + expected_slice = np.array([0.4492, 0.3865, 0.4222, 0.5854, 0.5139, 0.4379, 0.4193, 0.48, 0.4218]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 @@ -157,9 +155,8 @@ def test_stable_diffusion_img2img_negative_prompt(self): image_slice = image[0, -3:, -3:, -1] assert image.shape == (1, 32, 32, 3) - expected_slice = np.array( - [0.4104152, 0.38498846, 0.41070235, 0.52090424, 0.47205922, 0.42849067, 0.41589636, 0.46834698, 0.4408132] - ) + expected_slice = np.array([0.4065, 0.3783, 0.4050, 0.5266, 0.4781, 0.4252, 0.4203, 0.4692, 0.4365]) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 def test_stable_diffusion_img2img_multiple_init_images(self): @@ -176,9 +173,7 @@ def test_stable_diffusion_img2img_multiple_init_images(self): image_slice = image[-1, -3:, -3:, -1] assert image.shape == (2, 32, 32, 3) - expected_slice = np.array( - [0.46686086, 0.44393504, 0.5084481, 0.67471784, 0.55514234, 0.5346449, 0.63654804, 0.51626045, 0.46245307] - ) + expected_slice = np.array([0.5144, 0.4447, 0.4735, 0.6676, 0.5526, 0.5454, 0.645, 0.5149, 0.4689]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 @@ -197,9 +192,7 @@ def test_stable_diffusion_img2img_k_lms(self): image_slice = image[0, -3:, -3:, -1] assert image.shape == (1, 32, 32, 3) - expected_slice = np.array( - [0.4390018, 0.49910325, 0.43994197, 0.6633433, 0.56556225, 0.44274506, 0.58594346, 0.60113865, 0.52007025] - ) + expected_slice = np.array([0.4367, 0.4986, 0.4372, 0.6706, 0.5665, 0.444, 0.5864, 0.6019, 0.5203]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 From 1fe112cba66085945e3dacb891f630e8beee68c3 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Tue, 14 Mar 2023 01:38:59 +0000 Subject: [PATCH 27/45] fix img2img tests --- .../stable_diffusion/test_stable_diffusion_img2img.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py index 77c943d4d30f..5ab341b8a892 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py @@ -129,6 +129,7 @@ def test_stable_diffusion_img2img_default_case(self): device = "cpu" # ensure determinism for the device-dependent torch.Generator components = self.get_dummy_components() sd_pipe = StableDiffusionImg2ImgPipeline(**components) + sd_pipe.image_processor = VaeImageProcessor(vae_scale_factor=sd_pipe.vae_scale_factor, do_normalize=False) sd_pipe = sd_pipe.to(device) sd_pipe.set_progress_bar_config(disable=None) @@ -145,6 +146,7 @@ def test_stable_diffusion_img2img_negative_prompt(self): device = "cpu" # ensure determinism for the device-dependent torch.Generator components = self.get_dummy_components() sd_pipe = StableDiffusionImg2ImgPipeline(**components) + sd_pipe.image_processor = VaeImageProcessor(vae_scale_factor=sd_pipe.vae_scale_factor, do_normalize=False) sd_pipe = sd_pipe.to(device) sd_pipe.set_progress_bar_config(disable=None) @@ -163,6 +165,7 @@ def test_stable_diffusion_img2img_multiple_init_images(self): device = "cpu" # ensure determinism for the device-dependent torch.Generator components = self.get_dummy_components() sd_pipe = StableDiffusionImg2ImgPipeline(**components) + sd_pipe.image_processor = VaeImageProcessor(vae_scale_factor=sd_pipe.vae_scale_factor, do_normalize=False) sd_pipe = sd_pipe.to(device) sd_pipe.set_progress_bar_config(disable=None) @@ -184,6 +187,7 @@ def test_stable_diffusion_img2img_k_lms(self): beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear" ) sd_pipe = StableDiffusionImg2ImgPipeline(**components) + sd_pipe.image_processor = VaeImageProcessor(vae_scale_factor=sd_pipe.vae_scale_factor, do_normalize=False) sd_pipe = sd_pipe.to(device) sd_pipe.set_progress_bar_config(disable=None) From 2f4cadec808635568f99b6dd412e4ed222099ec3 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Tue, 14 Mar 2023 01:42:34 +0000 Subject: [PATCH 28/45] make style --- src/diffusers/image_processor.py | 2 +- .../stable_diffusion/pipeline_stable_diffusion_img2img.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index 1d20c45bd18e..52f84a98bbd8 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -129,7 +129,7 @@ def preprocess( image = torch.cat(image, axis=0) if image[0].ndim == 4 else torch.stack(image, axis=0) _, _, height, width = image.shape - + if self.do_resize and (height % self.vae_scale_factor != 0 or width % self.vae_scale_factor != 0): raise ValueError( f"the height and width of image have to be divisible by {self.vae_scale_factor} but are {height} and {width}." diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 691b946dd3c2..5f9771242452 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -15,6 +15,7 @@ import inspect from typing import Callable, List, Optional, Union +import numpy as np import PIL import torch from packaging import version @@ -25,6 +26,7 @@ from ...models import AutoencoderKL, UNet2DConditionModel from ...schedulers import KarrasDiffusionSchedulers from ...utils import ( + PIL_INTERPOLATION, deprecate, is_accelerate_available, is_accelerate_version, @@ -88,7 +90,7 @@ def preprocess(image): image = torch.cat(image, dim=0) return image - + class StableDiffusionImg2ImgPipeline(DiffusionPipeline): r""" Pipeline for text-guided image to image generation using Stable Diffusion. From 90e0539f6ce2952f4349a4ceabfcd6f6cfff889d Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Tue, 14 Mar 2023 01:45:37 +0000 Subject: [PATCH 29/45] remov #fixed copy on img2img init method --- .../stable_diffusion/pipeline_stable_diffusion_img2img.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 5f9771242452..63584c3fbb8d 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -120,7 +120,6 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): """ _optional_components = ["safety_checker", "feature_extractor"] - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.__init__ def __init__( self, vae: AutoencoderKL, From 983f4e924818babaaa098a4f824b985f1443ee36 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Tue, 14 Mar 2023 01:48:39 +0000 Subject: [PATCH 30/45] remove #copy on img2img decode_latents --- .../stable_diffusion/pipeline_stable_diffusion_img2img.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 63584c3fbb8d..684843517af6 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -433,7 +433,6 @@ def run_safety_checker(self, image, device, dtype): ) return image, has_nsfw_concept - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.decode_latents def decode_latents(self, latents): latents = 1 / self.vae.config.scaling_factor * latents image = self.vae.decode(latents).sample From 8ab5015a85616d5c7b938311ece214a16e42fbb7 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Tue, 14 Mar 2023 03:20:49 +0000 Subject: [PATCH 31/45] update alt_img2img --- .../pipeline_alt_diffusion_img2img.py | 47 ++++++++++++------- src/diffusers/utils/dummy_pt_objects.py | 15 ++++++ .../test_alt_diffusion_img2img.py | 4 +- 3 files changed, 48 insertions(+), 18 deletions(-) diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py index 1e7872e3b081..c9d64b934ab0 100644 --- a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +++ b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py @@ -24,6 +24,7 @@ from diffusers.utils import is_accelerate_available, is_accelerate_version from ...configuration_utils import FrozenDict +from ...image_processor import VaeImageProcessor from ...models import AutoencoderKL, UNet2DConditionModel from ...schedulers import KarrasDiffusionSchedulers from ...utils import PIL_INTERPOLATION, deprecate, logging, randn_tensor, replace_example_docstring @@ -192,7 +193,6 @@ def __init__( new_config = dict(unet.config) new_config["sample_size"] = 64 unet._internal_dict = FrozenDict(new_config) - self.register_modules( vae=vae, text_encoder=text_encoder, @@ -203,7 +203,11 @@ def __init__( feature_extractor=feature_extractor, ) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) - self.register_to_config(requires_safety_checker=requires_safety_checker) + + self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) + self.register_to_config( + requires_safety_checker=requires_safety_checker, + ) def enable_sequential_cpu_offload(self, gpu_id=0): r""" @@ -415,13 +419,11 @@ def _encode_prompt( return prompt_embeds def run_safety_checker(self, image, device, dtype): - if self.safety_checker is not None: - safety_checker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to(device) - image, has_nsfw_concept = self.safety_checker( - images=image, clip_input=safety_checker_input.pixel_values.to(dtype) - ) - else: - has_nsfw_concept = None + feature_extractor_input = self.image_processor.postprocess(image, output_type="pil") + safety_checker_input = self.feature_extractor(feature_extractor_input, return_tensors="pt").to(device) + image, has_nsfw_concept = self.safety_checker( + images=image, clip_input=safety_checker_input.pixel_values.to(dtype) + ) return image, has_nsfw_concept def decode_latents(self, latents): @@ -429,7 +431,7 @@ def decode_latents(self, latents): image = self.vae.decode(latents).sample image = (image / 2 + 0.5).clamp(0, 1) # we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16 - image = image.cpu().permute(0, 2, 3, 1).float().numpy() + # image = image.cpu().permute(0, 2, 3, 1).float().numpy() return image def prepare_extra_step_kwargs(self, generator, eta): @@ -663,7 +665,7 @@ def __call__( ) # 4. Preprocess image - image = preprocess(image) + image = self.image_processor.preprocess(image) # 5. set timesteps self.scheduler.set_timesteps(num_inference_steps, device=device) @@ -703,15 +705,26 @@ def __call__( if callback is not None and i % callback_steps == 0: callback(i, t, latents) - # 9. Post-processing + if output_type not in ["latent", "pt", "np", "pil"]: + deprecation_message = ( + f"the output_type {output_type} is outdated. Please make sure to set it to one of these instead: " + "`pil`, `np`, `pt`, `latent`" + ) + deprecate("Unsupported output_type", "1.0.0", deprecation_message, standard_warn=False) + output_type = "np" + + if output_type == "latent": + image = latents + has_nsfw_concept = None + image = self.decode_latents(latents) - # 10. Run safety checker - image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype) + if self.safety_checker is not None: + image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype) + else: + has_nsfw_concept = False - # 11. Convert to PIL - if output_type == "pil": - image = self.numpy_to_pil(image) + image = self.image_processor.postprocess(image, output_type=output_type) # Offload last model to CPU if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: diff --git a/src/diffusers/utils/dummy_pt_objects.py b/src/diffusers/utils/dummy_pt_objects.py index c731a1f1ddf3..1fcfb91d72e0 100644 --- a/src/diffusers/utils/dummy_pt_objects.py +++ b/src/diffusers/utils/dummy_pt_objects.py @@ -2,6 +2,21 @@ from ..utils import DummyObject, requires_backends +class VaeImageProcessor(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + class AutoencoderKL(metaclass=DummyObject): _backends = ["torch"] diff --git a/tests/pipelines/altdiffusion/test_alt_diffusion_img2img.py b/tests/pipelines/altdiffusion/test_alt_diffusion_img2img.py index d2745115af1c..9663f509dbc5 100644 --- a/tests/pipelines/altdiffusion/test_alt_diffusion_img2img.py +++ b/tests/pipelines/altdiffusion/test_alt_diffusion_img2img.py @@ -21,7 +21,7 @@ import torch from transformers import XLMRobertaTokenizer -from diffusers import AltDiffusionImg2ImgPipeline, AutoencoderKL, PNDMScheduler, UNet2DConditionModel +from diffusers import AltDiffusionImg2ImgPipeline, AutoencoderKL, PNDMScheduler, UNet2DConditionModel, VaeImageProcessor from diffusers.pipelines.alt_diffusion.modeling_roberta_series import ( RobertaSeriesConfig, RobertaSeriesModelWithTransformation, @@ -128,6 +128,7 @@ def test_stable_diffusion_img2img_default_case(self): safety_checker=None, feature_extractor=self.dummy_extractor, ) + alt_pipe.image_processor = VaeImageProcessor(vae_scale_factor=alt_pipe.vae_scale_factor, do_normalize=False) alt_pipe = alt_pipe.to(device) alt_pipe.set_progress_bar_config(disable=None) @@ -191,6 +192,7 @@ def test_stable_diffusion_img2img_fp16(self): safety_checker=None, feature_extractor=self.dummy_extractor, ) + alt_pipe.image_processor = VaeImageProcessor(vae_scale_factor=alt_pipe.vae_scale_factor, do_normalize=False) alt_pipe = alt_pipe.to(torch_device) alt_pipe.set_progress_bar_config(disable=None) From 4cc2d0e3ab576b7b4c006c4d2ccb690e982118ce Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Tue, 14 Mar 2023 03:23:16 +0000 Subject: [PATCH 32/45] style --- .../pipelines/altdiffusion/test_alt_diffusion_img2img.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/pipelines/altdiffusion/test_alt_diffusion_img2img.py b/tests/pipelines/altdiffusion/test_alt_diffusion_img2img.py index 9663f509dbc5..10a3ef777b0e 100644 --- a/tests/pipelines/altdiffusion/test_alt_diffusion_img2img.py +++ b/tests/pipelines/altdiffusion/test_alt_diffusion_img2img.py @@ -21,7 +21,13 @@ import torch from transformers import XLMRobertaTokenizer -from diffusers import AltDiffusionImg2ImgPipeline, AutoencoderKL, PNDMScheduler, UNet2DConditionModel, VaeImageProcessor +from diffusers import ( + AltDiffusionImg2ImgPipeline, + AutoencoderKL, + PNDMScheduler, + UNet2DConditionModel, + VaeImageProcessor, +) from diffusers.pipelines.alt_diffusion.modeling_roberta_series import ( RobertaSeriesConfig, RobertaSeriesModelWithTransformation, From d919e695f4b56dea6e50016019f7eac5f4de736e Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Tue, 14 Mar 2023 04:14:22 +0000 Subject: [PATCH 33/45] deprecate preprocess --- .../stable_diffusion/pipeline_stable_diffusion_img2img.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 684843517af6..f8a55d026248 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -71,6 +71,11 @@ def preprocess(image): + warnings.warn( + "The function preprocess is deprecated and will be removed. Please" + " use VAEImageProcessor.preprocess instead.", + FutureWarning, + ) if isinstance(image, torch.Tensor): return image elif isinstance(image, PIL.Image.Image): From daa3d32da4edf6fb30c91f25de2230656ede6ee2 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Tue, 14 Mar 2023 04:23:07 +0000 Subject: [PATCH 34/45] style + copy --- .../alt_diffusion/pipeline_alt_diffusion_img2img.py | 6 ++++++ src/diffusers/pipelines/repaint/pipeline_repaint.py | 6 ++++++ .../stable_diffusion/pipeline_cycle_diffusion.py | 6 ++++++ .../pipeline_onnx_stable_diffusion_img2img.py | 5 +++++ .../pipeline_stable_diffusion_depth2img.py | 6 ++++++ .../pipeline_stable_diffusion_img2img.py | 9 +++++---- .../pipeline_stable_diffusion_instruct_pix2pix.py | 6 ++++++ .../pipeline_stable_diffusion_pix2pix_zero.py | 6 ++++++ 8 files changed, 46 insertions(+), 4 deletions(-) diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py index c9d64b934ab0..656223c04616 100644 --- a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +++ b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py @@ -13,6 +13,7 @@ # limitations under the License. import inspect +import warnings from typing import Callable, List, Optional, Union import numpy as np @@ -67,6 +68,11 @@ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess def preprocess(image): + warnings.warn( + "The function preprocess is deprecated and will be removed. Please" + " use VAEImageProcessor.preprocess instead.", + FutureWarning, + ) if isinstance(image, torch.Tensor): return image elif isinstance(image, PIL.Image.Image): diff --git a/src/diffusers/pipelines/repaint/pipeline_repaint.py b/src/diffusers/pipelines/repaint/pipeline_repaint.py index fabcd2610f43..5f4a65f43f17 100644 --- a/src/diffusers/pipelines/repaint/pipeline_repaint.py +++ b/src/diffusers/pipelines/repaint/pipeline_repaint.py @@ -13,6 +13,7 @@ # limitations under the License. +import warnings from typing import List, Optional, Tuple, Union import numpy as np @@ -30,6 +31,11 @@ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess def _preprocess_image(image: Union[List, PIL.Image.Image, torch.Tensor]): + warnings.warn( + "The function preprocess is deprecated and will be removed. Please" + " use VAEImageProcessor.preprocess instead.", + FutureWarning, + ) if isinstance(image, torch.Tensor): return image elif isinstance(image, PIL.Image.Image): diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py index e977071b9c6c..43751de89d98 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py @@ -13,6 +13,7 @@ # limitations under the License. import inspect +import warnings from typing import Callable, List, Optional, Union import numpy as np @@ -37,6 +38,11 @@ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess def preprocess(image): + warnings.warn( + "The function preprocess is deprecated and will be removed. Please" + " use VAEImageProcessor.preprocess instead.", + FutureWarning, + ) if isinstance(image, torch.Tensor): return image elif isinstance(image, PIL.Image.Image): diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py index 9123e5f3296d..3391e61e33d8 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py @@ -13,6 +13,7 @@ # limitations under the License. import inspect +import warnings from typing import Callable, List, Optional, Union import numpy as np @@ -33,6 +34,10 @@ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess with 8->64 def preprocess(image): + warnings.warn( + "The function preprocess is deprecated and will be removed. Please use VAEImageProcessor.preprocess instead.", + FutureWarning, + ) if isinstance(image, torch.Tensor): return image elif isinstance(image, PIL.Image.Image): diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py index 6c02e06a6523..309109a42779 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py @@ -14,6 +14,7 @@ import contextlib import inspect +import warnings from typing import Callable, List, Optional, Union import numpy as np @@ -34,6 +35,11 @@ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess def preprocess(image): + warnings.warn( + "The function preprocess is deprecated and will be removed. Please" + " use VAEImageProcessor.preprocess instead.", + FutureWarning, + ) if isinstance(image, torch.Tensor): return image elif isinstance(image, PIL.Image.Image): diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index f8a55d026248..3cf71f6551a0 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -13,6 +13,7 @@ # limitations under the License. import inspect +import warnings from typing import Callable, List, Optional, Union import numpy as np @@ -72,10 +73,10 @@ def preprocess(image): warnings.warn( - "The function preprocess is deprecated and will be removed. Please" - " use VAEImageProcessor.preprocess instead.", - FutureWarning, - ) + "The function preprocess is deprecated and will be removed. Please" + " use VAEImageProcessor.preprocess instead.", + FutureWarning, + ) if isinstance(image, torch.Tensor): return image elif isinstance(image, PIL.Image.Image): diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py index 953df11aa4f7..2c5c07f19e5b 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py @@ -13,6 +13,7 @@ # limitations under the License. import inspect +import warnings from typing import Callable, List, Optional, Union import numpy as np @@ -40,6 +41,11 @@ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess def preprocess(image): + warnings.warn( + "The function preprocess is deprecated and will be removed. Please" + " use VAEImageProcessor.preprocess instead.", + FutureWarning, + ) if isinstance(image, torch.Tensor): return image elif isinstance(image, PIL.Image.Image): diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py index b5a352c785ee..5494cd267478 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py @@ -13,6 +13,7 @@ # limitations under the License. import inspect +import warnings from dataclasses import dataclass from typing import Any, Callable, Dict, List, Optional, Union @@ -171,6 +172,11 @@ class Pix2PixInversionPipelineOutput(BaseOutput): # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess def preprocess(image): + warnings.warn( + "The function preprocess is deprecated and will be removed. Please" + " use VAEImageProcessor.preprocess instead.", + FutureWarning, + ) if isinstance(image, torch.Tensor): return image elif isinstance(image, PIL.Image.Image): From cd83878a7d02ac6799adc88520281d13b878744e Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Tue, 14 Mar 2023 04:57:50 +0000 Subject: [PATCH 35/45] style again --- src/diffusers/image_processor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index 52f84a98bbd8..7df632722e41 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -32,7 +32,8 @@ class VaeImageProcessor(ConfigMixin): do_resize (`bool`, *optional*, defaults to `True`): Whether to downscale the image's (height, width) dimensions to multiples of `vae_scale_factor`. vae_scale_factor (`int`, *optional*, defaults to `8`): - VAE scale factor. If `do_resize` is True, the image will be automatically resized to multiples of this factor. + VAE scale factor. If `do_resize` is True, the image will be automatically resized to multiples of this + factor. resample (`str`, *optional*, defaults to `lanczos`): Resampling filter to use when resizing the image. do_normalize (`bool`, *optional*, defaults to `True`): From 3dbb862627ded0b50f18f1b01a9c9902841d4dc2 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Tue, 14 Mar 2023 19:39:38 +0000 Subject: [PATCH 36/45] update error message for using resize with torch tensor or numpy array --- src/diffusers/image_processor.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index 7df632722e41..bb9f53f8ad4c 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -122,19 +122,26 @@ def preprocess( if self.do_resize: image = [self.resize(i) for i in image] image = [np.array(i).astype(np.float32) / 255.0 for i in image] + image = np.stack(image, axis=0) # to np + image = self.numpy_to_pt(image) # to pt - if isinstance(image[0], np.ndarray): + elif isinstance(image[0], np.ndarray): image = np.concatenate(image, axis=0) if image[0].ndim == 4 else np.stack(image, axis=0) image = self.numpy_to_pt(image) + _, _, height, width = image.shape + if self.do_resize and (height % self.vae_scale_factor != 0 or width % self.vae_scale_factor != 0): + raise ValueError( + f"Currently we only support resizing for PIL image - please resize your numpy array to be divisible by {self.vae_scale_factor}" + f"currently the sizes are {height} and {width}. You can also pass a PIL image instead to use resize option in VAEImageProcessor" + ) + elif isinstance(image[0], torch.Tensor): image = torch.cat(image, axis=0) if image[0].ndim == 4 else torch.stack(image, axis=0) - - _, _, height, width = image.shape - - if self.do_resize and (height % self.vae_scale_factor != 0 or width % self.vae_scale_factor != 0): - raise ValueError( - f"the height and width of image have to be divisible by {self.vae_scale_factor} but are {height} and {width}." - ) + if self.do_resize and (height % self.vae_scale_factor != 0 or width % self.vae_scale_factor != 0): + raise ValueError( + f"Currently we only support resizing for PIL image - please resize your numpy array to be divisible by {self.vae_scale_factor}" + f"currently the sizes are {height} and {width}. You can also pass a PIL image instead to use resize option in VAEImageProcessor" + ) # expected range [0,1], normalize to [-1,1] do_normalize = self.do_normalize From ef8582ffcdff7a4a009d8fe8770e7d0587d4df2b Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Tue, 14 Mar 2023 19:45:10 +0000 Subject: [PATCH 37/45] fix --- src/diffusers/image_processor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index bb9f53f8ad4c..e64be787d386 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -137,6 +137,7 @@ def preprocess( elif isinstance(image[0], torch.Tensor): image = torch.cat(image, axis=0) if image[0].ndim == 4 else torch.stack(image, axis=0) + _, _, height, width = image.shape if self.do_resize and (height % self.vae_scale_factor != 0 or width % self.vae_scale_factor != 0): raise ValueError( f"Currently we only support resizing for PIL image - please resize your numpy array to be divisible by {self.vae_scale_factor}" From 0cec7375854c1ef7ead1b1fbf63972ea6c15f6ee Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Tue, 14 Mar 2023 19:49:30 +0000 Subject: [PATCH 38/45] remove deprecation warning for preprocess function + fix copies --- src/diffusers/image_processor.py | 6 +++--- .../alt_diffusion/pipeline_alt_diffusion_img2img.py | 6 ------ src/diffusers/pipelines/repaint/pipeline_repaint.py | 6 ------ .../pipelines/stable_diffusion/pipeline_cycle_diffusion.py | 6 ------ .../pipeline_onnx_stable_diffusion_img2img.py | 5 ----- .../stable_diffusion/pipeline_stable_diffusion_depth2img.py | 6 ------ .../stable_diffusion/pipeline_stable_diffusion_img2img.py | 6 ------ .../pipeline_stable_diffusion_instruct_pix2pix.py | 6 ------ .../pipeline_stable_diffusion_pix2pix_zero.py | 6 ------ 9 files changed, 3 insertions(+), 50 deletions(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index e64be787d386..74299cf2628d 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -133,16 +133,16 @@ def preprocess( raise ValueError( f"Currently we only support resizing for PIL image - please resize your numpy array to be divisible by {self.vae_scale_factor}" f"currently the sizes are {height} and {width}. You can also pass a PIL image instead to use resize option in VAEImageProcessor" - ) + ) elif isinstance(image[0], torch.Tensor): image = torch.cat(image, axis=0) if image[0].ndim == 4 else torch.stack(image, axis=0) _, _, height, width = image.shape if self.do_resize and (height % self.vae_scale_factor != 0 or width % self.vae_scale_factor != 0): raise ValueError( - f"Currently we only support resizing for PIL image - please resize your numpy array to be divisible by {self.vae_scale_factor}" + f"Currently we only support resizing for PIL image - please resize your numpy array to be divisible by {self.vae_scale_factor}" f"currently the sizes are {height} and {width}. You can also pass a PIL image instead to use resize option in VAEImageProcessor" - ) + ) # expected range [0,1], normalize to [-1,1] do_normalize = self.do_normalize diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py index 656223c04616..c9d64b934ab0 100644 --- a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +++ b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py @@ -13,7 +13,6 @@ # limitations under the License. import inspect -import warnings from typing import Callable, List, Optional, Union import numpy as np @@ -68,11 +67,6 @@ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess def preprocess(image): - warnings.warn( - "The function preprocess is deprecated and will be removed. Please" - " use VAEImageProcessor.preprocess instead.", - FutureWarning, - ) if isinstance(image, torch.Tensor): return image elif isinstance(image, PIL.Image.Image): diff --git a/src/diffusers/pipelines/repaint/pipeline_repaint.py b/src/diffusers/pipelines/repaint/pipeline_repaint.py index 5f4a65f43f17..fabcd2610f43 100644 --- a/src/diffusers/pipelines/repaint/pipeline_repaint.py +++ b/src/diffusers/pipelines/repaint/pipeline_repaint.py @@ -13,7 +13,6 @@ # limitations under the License. -import warnings from typing import List, Optional, Tuple, Union import numpy as np @@ -31,11 +30,6 @@ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess def _preprocess_image(image: Union[List, PIL.Image.Image, torch.Tensor]): - warnings.warn( - "The function preprocess is deprecated and will be removed. Please" - " use VAEImageProcessor.preprocess instead.", - FutureWarning, - ) if isinstance(image, torch.Tensor): return image elif isinstance(image, PIL.Image.Image): diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py index 43751de89d98..e977071b9c6c 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py @@ -13,7 +13,6 @@ # limitations under the License. import inspect -import warnings from typing import Callable, List, Optional, Union import numpy as np @@ -38,11 +37,6 @@ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess def preprocess(image): - warnings.warn( - "The function preprocess is deprecated and will be removed. Please" - " use VAEImageProcessor.preprocess instead.", - FutureWarning, - ) if isinstance(image, torch.Tensor): return image elif isinstance(image, PIL.Image.Image): diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py index 3391e61e33d8..9123e5f3296d 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py @@ -13,7 +13,6 @@ # limitations under the License. import inspect -import warnings from typing import Callable, List, Optional, Union import numpy as np @@ -34,10 +33,6 @@ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess with 8->64 def preprocess(image): - warnings.warn( - "The function preprocess is deprecated and will be removed. Please use VAEImageProcessor.preprocess instead.", - FutureWarning, - ) if isinstance(image, torch.Tensor): return image elif isinstance(image, PIL.Image.Image): diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py index e270f1fe4ceb..9087064ae0b8 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py @@ -14,7 +14,6 @@ import contextlib import inspect -import warnings from typing import Callable, List, Optional, Union import numpy as np @@ -35,11 +34,6 @@ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess def preprocess(image): - warnings.warn( - "The function preprocess is deprecated and will be removed. Please" - " use VAEImageProcessor.preprocess instead.", - FutureWarning, - ) if isinstance(image, torch.Tensor): return image elif isinstance(image, PIL.Image.Image): diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 3cf71f6551a0..684843517af6 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -13,7 +13,6 @@ # limitations under the License. import inspect -import warnings from typing import Callable, List, Optional, Union import numpy as np @@ -72,11 +71,6 @@ def preprocess(image): - warnings.warn( - "The function preprocess is deprecated and will be removed. Please" - " use VAEImageProcessor.preprocess instead.", - FutureWarning, - ) if isinstance(image, torch.Tensor): return image elif isinstance(image, PIL.Image.Image): diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py index 2c5c07f19e5b..953df11aa4f7 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py @@ -13,7 +13,6 @@ # limitations under the License. import inspect -import warnings from typing import Callable, List, Optional, Union import numpy as np @@ -41,11 +40,6 @@ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess def preprocess(image): - warnings.warn( - "The function preprocess is deprecated and will be removed. Please" - " use VAEImageProcessor.preprocess instead.", - FutureWarning, - ) if isinstance(image, torch.Tensor): return image elif isinstance(image, PIL.Image.Image): diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py index 5494cd267478..b5a352c785ee 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py @@ -13,7 +13,6 @@ # limitations under the License. import inspect -import warnings from dataclasses import dataclass from typing import Any, Callable, Dict, List, Optional, Union @@ -172,11 +171,6 @@ class Pix2PixInversionPipelineOutput(BaseOutput): # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess def preprocess(image): - warnings.warn( - "The function preprocess is deprecated and will be removed. Please" - " use VAEImageProcessor.preprocess instead.", - FutureWarning, - ) if isinstance(image, torch.Tensor): return image elif isinstance(image, PIL.Image.Image): From be5fcdc28729e522ce066a442556f2bfdf4d91be Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Tue, 14 Mar 2023 19:55:42 +0000 Subject: [PATCH 39/45] remove comment --- .../pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py | 2 -- .../stable_diffusion/pipeline_stable_diffusion_img2img.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py index c9d64b934ab0..05138c86f246 100644 --- a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +++ b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py @@ -430,8 +430,6 @@ def decode_latents(self, latents): latents = 1 / self.vae.config.scaling_factor * latents image = self.vae.decode(latents).sample image = (image / 2 + 0.5).clamp(0, 1) - # we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16 - # image = image.cpu().permute(0, 2, 3, 1).float().numpy() return image def prepare_extra_step_kwargs(self, generator, eta): diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 684843517af6..8b3a7944def1 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -437,8 +437,6 @@ def decode_latents(self, latents): latents = 1 / self.vae.config.scaling_factor * latents image = self.vae.decode(latents).sample image = (image / 2 + 0.5).clamp(0, 1) - # we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16 - # image = image.cpu().permute(0, 2, 3, 1).float().numpy() return image # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs From f3a2676b53a5c21491b0b146408515b39b471cb7 Mon Sep 17 00:00:00 2001 From: YiYi Xu Date: Tue, 14 Mar 2023 10:17:06 -1000 Subject: [PATCH 40/45] Apply suggestions from code review Co-authored-by: Pedro Cuenca --- src/diffusers/image_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index 74299cf2628d..0fbabe18eccf 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -108,7 +108,7 @@ def preprocess( image: Union[torch.FloatTensor, PIL.Image.Image, np.ndarray], ) -> torch.Tensor: """ - Preprocess the image input, accpet formats in PIL images, numpy arrays or pytorch tensors" + Preprocess the image input, accepted formats are PIL images, numpy arrays or pytorch tensors" """ supported_formats = (PIL.Image.Image, np.ndarray, torch.Tensor) if isinstance(image, supported_formats): From 419cabba213a7312ac235e7f8eb0da66c19076aa Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Tue, 14 Mar 2023 20:20:22 +0000 Subject: [PATCH 41/45] update error message --- src/diffusers/image_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index 0fbabe18eccf..de6543800b2d 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -140,7 +140,7 @@ def preprocess( _, _, height, width = image.shape if self.do_resize and (height % self.vae_scale_factor != 0 or width % self.vae_scale_factor != 0): raise ValueError( - f"Currently we only support resizing for PIL image - please resize your numpy array to be divisible by {self.vae_scale_factor}" + f"Currently we only support resizing for PIL image - please resize your pytorch tensor to be divisible by {self.vae_scale_factor}" f"currently the sizes are {height} and {width}. You can also pass a PIL image instead to use resize option in VAEImageProcessor" ) From c844d2cfb26da3fcb3163aaba4975e2e3160cf88 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Wed, 15 Mar 2023 17:36:29 +0100 Subject: [PATCH 42/45] Update src/diffusers/__init__.py --- src/diffusers/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index 4315669cc459..f480b4100907 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -32,7 +32,6 @@ except OptionalDependencyNotAvailable: from .utils.dummy_pt_objects import * # noqa F403 else: - from .image_processor import VaeImageProcessor from .models import ( AutoencoderKL, ControlNetModel, From bf513f117dc78b8654e98bb2124e001b6e94e135 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Wed, 15 Mar 2023 17:38:55 +0100 Subject: [PATCH 43/45] Apply suggestions from code review --- tests/pipelines/altdiffusion/test_alt_diffusion_img2img.py | 2 +- .../pipelines/stable_diffusion/test_stable_diffusion_img2img.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/pipelines/altdiffusion/test_alt_diffusion_img2img.py b/tests/pipelines/altdiffusion/test_alt_diffusion_img2img.py index 10a3ef777b0e..939632943405 100644 --- a/tests/pipelines/altdiffusion/test_alt_diffusion_img2img.py +++ b/tests/pipelines/altdiffusion/test_alt_diffusion_img2img.py @@ -26,8 +26,8 @@ AutoencoderKL, PNDMScheduler, UNet2DConditionModel, - VaeImageProcessor, ) +from diffusers.image_processor import VaeImageProcessor from diffusers.pipelines.alt_diffusion.modeling_roberta_series import ( RobertaSeriesConfig, RobertaSeriesModelWithTransformation, diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py index 5ab341b8a892..e27f83fc04fe 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py @@ -29,8 +29,8 @@ PNDMScheduler, StableDiffusionImg2ImgPipeline, UNet2DConditionModel, - VaeImageProcessor, ) +from diffusers.image_processor import VaeImageProcessor from diffusers.utils import floats_tensor, load_image, load_numpy, nightly, slow, torch_device from diffusers.utils.testing_utils import require_torch_gpu, skip_mps From 3054135ec505fc4315f06c7547cf3acd91e73ee4 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Wed, 15 Mar 2023 16:53:18 +0000 Subject: [PATCH 44/45] fix import --- tests/test_image_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_image_processor.py b/tests/test_image_processor.py index ce2483590668..4f0e2c5aecfd 100644 --- a/tests/test_image_processor.py +++ b/tests/test_image_processor.py @@ -19,7 +19,7 @@ import PIL import torch -from diffusers import VaeImageProcessor +from diffusers.image_processor import VaeImageProcessor class ImageProcessorTest(unittest.TestCase): From 89921a93ee8480995c88a6de86d7e53a1fa983b8 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Wed, 15 Mar 2023 16:53:48 +0000 Subject: [PATCH 45/45] fix copies --- src/diffusers/utils/dummy_pt_objects.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/diffusers/utils/dummy_pt_objects.py b/src/diffusers/utils/dummy_pt_objects.py index 1fcfb91d72e0..c731a1f1ddf3 100644 --- a/src/diffusers/utils/dummy_pt_objects.py +++ b/src/diffusers/utils/dummy_pt_objects.py @@ -2,21 +2,6 @@ from ..utils import DummyObject, requires_backends -class VaeImageProcessor(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch"]) - - class AutoencoderKL(metaclass=DummyObject): _backends = ["torch"]