From c605497f1328d54a819fb79b5872b6de2aff9aec Mon Sep 17 00:00:00 2001 From: multimodalart Date: Thu, 27 Apr 2023 15:12:05 +0200 Subject: [PATCH] Update IF name to XL --- docs/source/en/api/pipelines/if.mdx | 32 +++++++++---------- .../pipelines/deepfloyd_if/pipeline_if.py | 2 +- .../deepfloyd_if/pipeline_if_img2img.py | 2 +- .../pipeline_if_img2img_superresolution.py | 2 +- .../deepfloyd_if/pipeline_if_inpainting.py | 2 +- .../pipeline_if_inpainting_superresolution.py | 2 +- .../pipeline_if_superresolution.py | 2 +- tests/pipelines/deepfloyd_if/test_if.py | 2 +- 8 files changed, 23 insertions(+), 23 deletions(-) diff --git a/docs/source/en/api/pipelines/if.mdx b/docs/source/en/api/pipelines/if.mdx index 921a68a29f76..d79c7035fb75 100644 --- a/docs/source/en/api/pipelines/if.mdx +++ b/docs/source/en/api/pipelines/if.mdx @@ -29,7 +29,7 @@ Our work underscores the potential of larger UNet architectures in the first sta Before you can use IF, you need to accept its usage conditions. To do so: 1. Make sure to have a [Hugging Face account](https://huggingface.co/join) and be logged in -2. Accept the license on the model card of [DeepFloyd/IF-I-IF-v1.0](https://huggingface.co/DeepFloyd/IF-I-IF-v1.0). Accepting the license on the stage I model card will auto accept for the other IF models. +2. Accept the license on the model card of [DeepFloyd/IF-I-XL-v1.0](https://huggingface.co/DeepFloyd/IF-I-XL-v1.0). Accepting the license on the stage I model card will auto accept for the other IF models. 3. Make sure to login locally. Install `huggingface_hub` ```sh pip install huggingface_hub --upgrade @@ -62,7 +62,7 @@ The following sections give more in-detail examples of how to use IF. Specifical **Available checkpoints** - *Stage-1* - - [DeepFloyd/IF-I-IF-v1.0](https://huggingface.co/DeepFloyd/IF-I-IF-v1.0) + - [DeepFloyd/IF-I-XL-v1.0](https://huggingface.co/DeepFloyd/IF-I-XL-v1.0) - [DeepFloyd/IF-I-L-v1.0](https://huggingface.co/DeepFloyd/IF-I-L-v1.0) - [DeepFloyd/IF-I-M-v1.0](https://huggingface.co/DeepFloyd/IF-I-M-v1.0) @@ -90,7 +90,7 @@ from diffusers.utils import pt_to_pil import torch # stage 1 -stage_1 = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) +stage_1 = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16) stage_1.enable_model_cpu_offload() # stage 2 @@ -162,7 +162,7 @@ original_image = Image.open(BytesIO(response.content)).convert("RGB") original_image = original_image.resize((768, 512)) # stage 1 -stage_1 = IFImg2ImgPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) +stage_1 = IFImg2ImgPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16) stage_1.enable_model_cpu_offload() # stage 2 @@ -244,7 +244,7 @@ mask_image = Image.open(BytesIO(response.content)) mask_image = mask_image # stage 1 -stage_1 = IFInpaintingPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) +stage_1 = IFInpaintingPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16) stage_1.enable_model_cpu_offload() # stage 2 @@ -305,7 +305,7 @@ In addition to being loaded with `from_pretrained`, Pipelines can also be loaded ```python from diffusers import IFPipeline, IFSuperResolutionPipeline -pipe_1 = IFPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0") +pipe_1 = IFPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0") pipe_2 = IFSuperResolutionPipeline.from_pretrained("DeepFloyd/IF-II-L-v1.0") @@ -326,7 +326,7 @@ pipe_2 = IFInpaintingSuperResolutionPipeline(**pipe_2.components) The simplest optimization to run IF faster is to move all model components to the GPU. ```py -pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) +pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16) pipe.to("cuda") ``` @@ -352,7 +352,7 @@ the input image which also determines how many steps to run in the denoising pro A smaller number will vary the image less but run faster. ```py -pipe = IFImg2ImgPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) +pipe = IFImg2ImgPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16) pipe.to("cuda") image = pipe(image=image, prompt="", strength=0.3).images @@ -364,7 +364,7 @@ with IF and it might not give expected results. ```py import torch -pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) +pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16) pipe.to("cuda") pipe.text_encoder = torch.compile(pipe.text_encoder) @@ -378,14 +378,14 @@ When optimizing for GPU memory, we can use the standard diffusers cpu offloading Either the model based CPU offloading, ```py -pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) +pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16) pipe.enable_model_cpu_offload() ``` or the more aggressive layer based CPU offloading. ```py -pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) +pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16) pipe.enable_sequential_cpu_offload() ``` @@ -395,13 +395,13 @@ Additionally, T5 can be loaded in 8bit precision from transformers import T5EncoderModel text_encoder = T5EncoderModel.from_pretrained( - "DeepFloyd/IF-I-IF-v1.0", subfolder="text_encoder", device_map="auto", load_in_8bit=True, variant="8bit" + "DeepFloyd/IF-I-XL-v1.0", subfolder="text_encoder", device_map="auto", load_in_8bit=True, variant="8bit" ) from diffusers import DiffusionPipeline pipe = DiffusionPipeline.from_pretrained( - "DeepFloyd/IF-I-IF-v1.0", + "DeepFloyd/IF-I-XL-v1.0", text_encoder=text_encoder, # pass the previously instantiated 8bit text encoder unet=None, device_map="auto", @@ -422,13 +422,13 @@ from transformers import T5EncoderModel from diffusers.utils import pt_to_pil text_encoder = T5EncoderModel.from_pretrained( - "DeepFloyd/IF-I-IF-v1.0", subfolder="text_encoder", device_map="auto", load_in_8bit=True, variant="8bit" + "DeepFloyd/IF-I-XL-v1.0", subfolder="text_encoder", device_map="auto", load_in_8bit=True, variant="8bit" ) # text to image pipe = DiffusionPipeline.from_pretrained( - "DeepFloyd/IF-I-IF-v1.0", + "DeepFloyd/IF-I-XL-v1.0", text_encoder=text_encoder, # pass the previously instantiated 8bit text encoder unet=None, device_map="auto", @@ -444,7 +444,7 @@ gc.collect() torch.cuda.empty_cache() pipe = IFPipeline.from_pretrained( - "DeepFloyd/IF-I-IF-v1.0", text_encoder=None, variant="fp16", torch_dtype=torch.float16, device_map="auto" + "DeepFloyd/IF-I-XL-v1.0", text_encoder=None, variant="fp16", torch_dtype=torch.float16, device_map="auto" ) generator = torch.Generator().manual_seed(0) diff --git a/src/diffusers/pipelines/deepfloyd_if/pipeline_if.py b/src/diffusers/pipelines/deepfloyd_if/pipeline_if.py index a76e51a3ffe9..479ffa9e6635 100644 --- a/src/diffusers/pipelines/deepfloyd_if/pipeline_if.py +++ b/src/diffusers/pipelines/deepfloyd_if/pipeline_if.py @@ -41,7 +41,7 @@ >>> from diffusers.utils import pt_to_pil >>> import torch - >>> pipe = IFPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) + >>> pipe = IFPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16) >>> pipe.enable_model_cpu_offload() >>> prompt = 'a photo of a kangaroo wearing an orange hoodie and blue sunglasses standing in front of the eiffel tower holding a sign that says "very deep learning"' diff --git a/src/diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py b/src/diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py index a31748450d4b..fac4adeea463 100644 --- a/src/diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +++ b/src/diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py @@ -70,7 +70,7 @@ def resize(images: PIL.Image.Image, img_size: int) -> PIL.Image.Image: >>> original_image = original_image.resize((768, 512)) >>> pipe = IFImg2ImgPipeline.from_pretrained( - ... "DeepFloyd/IF-I-IF-v1.0", + ... "DeepFloyd/IF-I-XL-v1.0", ... variant="fp16", ... torch_dtype=torch.float16, ... ) diff --git a/src/diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py b/src/diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py index 21e280654cf5..eed1bb43e5d8 100644 --- a/src/diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +++ b/src/diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py @@ -73,7 +73,7 @@ def resize(images: PIL.Image.Image, img_size: int) -> PIL.Image.Image: >>> original_image = original_image.resize((768, 512)) >>> pipe = IFImg2ImgPipeline.from_pretrained( - ... "DeepFloyd/IF-I-IF-v1.0", + ... "DeepFloyd/IF-I-XL-v1.0", ... variant="fp16", ... torch_dtype=torch.float16, ... ) diff --git a/src/diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py b/src/diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py index 95eba1cc7d24..d3651f5169c1 100644 --- a/src/diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +++ b/src/diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py @@ -76,7 +76,7 @@ def resize(images: PIL.Image.Image, img_size: int) -> PIL.Image.Image: >>> mask_image = mask_image >>> pipe = IFInpaintingPipeline.from_pretrained( - ... "DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16 + ... "DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16 ... ) >>> pipe.enable_model_cpu_offload() diff --git a/src/diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py b/src/diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py index 4eb0bf300fa5..5ea6a47082ae 100644 --- a/src/diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +++ b/src/diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py @@ -78,7 +78,7 @@ def resize(images: PIL.Image.Image, img_size: int) -> PIL.Image.Image: >>> mask_image = mask_image >>> pipe = IFInpaintingPipeline.from_pretrained( - ... "DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16 + ... "DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16 ... ) >>> pipe.enable_model_cpu_offload() diff --git a/src/diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py b/src/diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py index bb1d4ee4ba66..a62a51b0972f 100644 --- a/src/diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +++ b/src/diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py @@ -45,7 +45,7 @@ >>> from diffusers.utils import pt_to_pil >>> import torch - >>> pipe = IFPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) + >>> pipe = IFPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16) >>> pipe.enable_model_cpu_offload() >>> prompt = 'a photo of a kangaroo wearing an orange hoodie and blue sunglasses standing in front of the eiffel tower holding a sign that says "very deep learning"' diff --git a/tests/pipelines/deepfloyd_if/test_if.py b/tests/pipelines/deepfloyd_if/test_if.py index e2204cb601a6..bf01c2350d22 100644 --- a/tests/pipelines/deepfloyd_if/test_if.py +++ b/tests/pipelines/deepfloyd_if/test_if.py @@ -94,7 +94,7 @@ def tearDown(self): def test_all(self): # if - pipe_1 = IFPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) + pipe_1 = IFPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16) pipe_2 = IFSuperResolutionPipeline.from_pretrained( "DeepFloyd/IF-II-L-v1.0", variant="fp16", torch_dtype=torch.float16, text_encoder=None, tokenizer=None