Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions src/diffusers/pipelines/ddgan/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from typing import TYPE_CHECKING

from ...utils import (
DIFFUSERS_SLOW_IMPORT,
_LazyModule,
)


_import_structure = {"pipeline_ddgan": ["DDGANPipeline"]}


if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
from .pipeline_ddgan import DDGANPipeline
else:
import sys

sys.modules[__name__] = _LazyModule(
__name__,
globals()["__file__"],
_import_structure,
module_spec=__spec__,
)
315 changes: 315 additions & 0 deletions src/diffusers/pipelines/ddgan/pipeline_ddgan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,315 @@
# Copyright 2023 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import inspect
from typing import Any, Callable, Dict, List, Optional, Union

import torch
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer

from ...image_processor import VaeImageProcessor
from ...loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
from ...models import AutoencoderKL, UNet2DConditionModel
from ...models.lora import adjust_lora_scale_text_encoder
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import (
USE_PEFT_BACKEND,
deprecate,
logging,
replace_example_docstring,
scale_lora_layers,
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput


logger = logging.get_logger(__name__) # pylint: disable=invalid-name


# TODO: update
EXAMPLE_DOC_STRING = """
Examples:
```py
>>> from diffusers import DiffusionPipeline
>>> import torch

>>> pipe = DiffusionPipeline.from_pretrained("SimianLuo/LCM_Dreamshaper_v7")
>>> # To save GPU memory, torch.float16 can be used, but it may compromise image quality.
>>> pipe.to(torch_device="cuda", torch_dtype=torch.float32)

>>> prompt = "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k"

>>> # Can be set to 1~50 steps. LCM support fast inference even <= 4 steps. Recommend: 1~8 steps.
>>> num_inference_steps = 4
>>> images = pipe(prompt=prompt, num_inference_steps=num_inference_steps, guidance_scale=8.0).images
>>> images[0].save("image.png")
```
"""


# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.retrieve_timesteps
def retrieve_timesteps(
scheduler,
num_inference_steps: Optional[int] = None,
device: Optional[Union[str, torch.device]] = None,
timesteps: Optional[List[int]] = None,
**kwargs,
):
"""
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.

Args:
scheduler (`SchedulerMixin`):
The scheduler to get timesteps from.
num_inference_steps (`int`):
The number of diffusion steps used when generating samples with a pre-trained model. If used,
`timesteps` must be `None`.
device (`str` or `torch.device`, *optional*):
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
timesteps (`List[int]`, *optional*):
Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default
timestep spacing strategy of the scheduler is used. If `timesteps` is passed, `num_inference_steps`
must be `None`.

Returns:
`Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
second element is the number of inference steps.
"""
if timesteps is not None:
accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
if not accepts_timesteps:
raise ValueError(
f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
f" timestep schedules. Please check whether you are using the correct scheduler."
)
scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
timesteps = scheduler.timesteps
num_inference_steps = len(timesteps)
else:
scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
timesteps = scheduler.timesteps
return timesteps, num_inference_steps


class DDGANPipeline(DiffusionPipeline):
r"""
Pipeline for denoising diffusion GAN (DDGAN) model.

This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
implemented for all pipelines (downloading, saving, running on a particular device, etc.).

Args:
unet ([`UNet2DModel`]):
A `UNet2DModel` to denoise the noisy images.
scheduler ([`SchedulerMixin`]):
A scheduler to be used in combination with `unet` to denoise the noisy images.
"""

model_cpu_offload_seq = "unet"

def __init__(
self,
unet: UNet2DConditionModel,
scheduler: KarrasDiffusionSchedulers,
):
super().__init__()

self.register_modules(
unet=unet,
scheduler=scheduler,
)

# Copied from diffusers.pipelines.consistency_models.pipeline_consistency_models.ConsistencyModelPipeline.prepare_latents
def prepare_latents(self, batch_size, num_channels, height, width, dtype, device, generator, latents=None):
shape = (batch_size, num_channels, height, width)
if isinstance(generator, list) and len(generator) != batch_size:
raise ValueError(
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
f" size of {batch_size}. Make sure the batch size matches the length of the generators."
)

if latents is None:
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
else:
latents = latents.to(device=device, dtype=dtype)

# scale the initial noise by the standard deviation required by the scheduler
latents = latents * self.scheduler.init_noise_sigma
return latents

# Copied from diffusers.pipelines.consistency_models.pipeline_consistency_models.ConsistencyModelPipeline.postprocess_image
def postprocess_image(self, sample: torch.FloatTensor, output_type: str = "pil"):
if output_type not in ["pt", "np", "pil"]:
raise ValueError(
f"output_type={output_type} is not supported. Make sure to choose one of ['pt', 'np', or 'pil']"
)

# Equivalent to diffusers.VaeImageProcessor.denormalize
sample = (sample / 2 + 0.5).clamp(0, 1)
if output_type == "pt":
return sample

# Equivalent to diffusers.VaeImageProcessor.pt_to_numpy
sample = sample.cpu().permute(0, 2, 3, 1).numpy()
if output_type == "np":
return sample

# Output_type must be 'pil'
sample = self.numpy_to_pil(sample)
return sample

# Copied from diffusers.pipelines.consistency_models.pipeline_consistency_models.ConsistencyModelPipeline.prepare_class_labels
def prepare_class_labels(self, batch_size, device, class_labels=None):
if self.unet.config.num_class_embeds is not None:
if isinstance(class_labels, list):
class_labels = torch.tensor(class_labels, dtype=torch.int)
elif isinstance(class_labels, int):
assert batch_size == 1, "Batch size must be 1 if classes is an int"
class_labels = torch.tensor([class_labels], dtype=torch.int)
elif class_labels is None:
# Randomly generate batch_size class labels
# TODO: should use generator here? int analogue of randn_tensor is not exposed in ...utils
class_labels = torch.randint(0, self.unet.config.num_class_embeds, size=(batch_size,))
class_labels = class_labels.to(device)
else:
class_labels = None

def check_inputs(self, num_inference_steps, timesteps, latents, batch_size, img_size, callback_steps):
if num_inference_steps is None and timesteps is None:
raise ValueError("Exactly one of `num_inference_steps` or `timesteps` must be supplied.")

if num_inference_steps is not None and timesteps is not None:
logger.warning(
f"Both `num_inference_steps`: {num_inference_steps} and `timesteps`: {timesteps} are supplied;"
" `timesteps` will be used over `num_inference_steps`."
)

if latents is not None:
expected_shape = (batch_size, 3, img_size, img_size)
if latents.shape != expected_shape:
raise ValueError(f"The shape of latents is {latents.shape} but is expected to be {expected_shape}.")

if (callback_steps is None) or (
callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0)
):
raise ValueError(
f"`callback_steps` has to be a positive integer but is {callback_steps} of type"
f" {type(callback_steps)}."
)

@torch.no_grad()
@replace_example_docstring(EXAMPLE_DOC_STRING)
def __call__(
self,
batch_size: int = 1,
class_labels: Optional[Union[torch.Tensor, List[int], int]] = None,
num_inference_steps: int = 4,
timesteps: List[int] = None,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None,
output_type: Optional[str] = "pil",
return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
callback_steps: int = 1,
):
r"""
Args:
batch_size (`int`, *optional*, defaults to 1):
The number of images to generate.
class_labels (`torch.Tensor` or `List[int]` or `int`, *optional*):
Optional class labels for conditioning class-conditional consistency models. Not used if the model is
not class-conditional.
num_inference_steps (`int`, *optional*, defaults to 1):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
expense of slower inference.
timesteps (`List[int]`, *optional*):
Custom timesteps to use for the denoising process. If not defined, equal spaced `num_inference_steps`
timesteps are used. Must be in descending order.
generator (`torch.Generator`, *optional*):
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
generation deterministic.
latents (`torch.FloatTensor`, *optional*):
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor is generated by sampling using the supplied random `generator`.
output_type (`str`, *optional*, defaults to `"pil"`):
The output format of the generated image. Choose between `PIL.Image` or `np.array`.
return_dict (`bool`, *optional*, defaults to `True`):
Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple.
callback (`Callable`, *optional*):
A function that calls every `callback_steps` steps during inference. The function is called with the
following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function is called. If not specified, the callback is called at
every step.

Examples:

Returns:
[`~pipelines.ImagePipelineOutput`] or `tuple`:
If `return_dict` is `True`, [`~pipelines.ImagePipelineOutput`] is returned, otherwise a `tuple` is
returned where the first element is a list with the generated images.
"""
# 0. Prepare call parameters
img_size = self.unet.config.sample_size
device = self._execution_device

# 1. Check inputs
self.check_inputs(num_inference_steps, timesteps, latents, batch_size, img_size, callback_steps)

# 2. Prepare image latents
# Sample image latents x_T ~ N(0, sigma^2 * I)
sample = self.prepare_latents(
batch_size=batch_size,
num_channels=self.unet.config.in_channels,
height=img_size,
width=img_size,
dtype=self.unet.dtype,
device=device,
generator=generator,
latents=latents,
)

# 3. Handle class_labels for class-conditional models
class_labels = self.prepare_class_labels(batch_size, device, class_labels=class_labels)

# 4. Prepare timesteps
timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)

# 5. Denoising loop
with self.progress_bar(total=num_inference_steps) as progress_bar:
for i, t in enumerate(timesteps):
scaled_sample = self.scheduler.scale_model_input(sample, t)
model_output = self.unet(scaled_sample, t, class_labels=class_labels, return_dict=False)[0]

sample = self.scheduler.step(model_output, t, sample, generator=generator)[0]

# call the callback, if provided
progress_bar.update()
if callback is not None and i % callback_steps == 0:
callback(i, t, sample)

# 6. Post-process image sample
image = self.postprocess_image(sample, output_type=output_type)

# Offload all models
self.maybe_free_model_hooks()

if not return_dict:
return (image,)

return ImagePipelineOutput(images=image)
Loading