Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions examples/controlnet/train_controlnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
from diffusers.optimization import get_scheduler
from diffusers.utils import check_min_version, is_wandb_available
from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.torch_utils import is_compiled_module


if is_wandb_available():
Expand Down Expand Up @@ -787,6 +788,12 @@ def main(args):
logger.info("Initializing controlnet weights from unet")
controlnet = ControlNetModel.from_unet(unet)

# Taken from [Sayak Paul's Diffusers PR #6511](https://github.com/huggingface/diffusers/pull/6511/files)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove it. Not a problem. But I appreciate the thoughtfulness.

def unwrap_model(model):
model = accelerator.unwrap_model(model)
model = model._orig_mod if is_compiled_module(model) else model
return model

# `accelerate` 0.16.0 will have better support for customized saving
if version.parse(accelerate.__version__) >= version.parse("0.16.0"):
# create custom saving & loading hooks so that `accelerator.save_state(...)` serializes in a nice format
Expand Down Expand Up @@ -846,9 +853,9 @@ def load_model_hook(models, input_dir):
" doing mixed precision training, copy of the weights should still be float32."
)

if accelerator.unwrap_model(controlnet).dtype != torch.float32:
if unwrap_model(controlnet).dtype != torch.float32:
raise ValueError(
f"Controlnet loaded as datatype {accelerator.unwrap_model(controlnet).dtype}. {low_precision_error_string}"
f"Controlnet loaded as datatype {unwrap_model(controlnet).dtype}. {low_precision_error_string}"
)

# Enable TF32 for faster training on Ampere GPUs,
Expand Down Expand Up @@ -1015,7 +1022,7 @@ def load_model_hook(models, input_dir):
noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)

# Get the text embedding for conditioning
encoder_hidden_states = text_encoder(batch["input_ids"])[0]
encoder_hidden_states = text_encoder(batch["input_ids"], return_dict=False)[0]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: For ControlNet training on SD, it won't matter (same for SDXL) as we never train the text encoder during ControlNet training. But keeping it this way doesn't hurt things. So, I am okay with it.


controlnet_image = batch["conditioning_pixel_values"].to(dtype=weight_dtype)

Expand All @@ -1036,7 +1043,8 @@ def load_model_hook(models, input_dir):
sample.to(dtype=weight_dtype) for sample in down_block_res_samples
],
mid_block_additional_residual=mid_block_res_sample.to(dtype=weight_dtype),
).sample
return_dict=False,
)[0]

# Get the target for loss depending on the prediction type
if noise_scheduler.config.prediction_type == "epsilon":
Expand Down Expand Up @@ -1109,7 +1117,7 @@ def load_model_hook(models, input_dir):
# Create the pipeline using using the trained modules and save it.
accelerator.wait_for_everyone()
if accelerator.is_main_process:
controlnet = accelerator.unwrap_model(controlnet)
controlnet = unwrap_model(controlnet)
controlnet.save_pretrained(args.output_dir)

if args.push_to_hub:
Expand Down