Skip to content

Commit 8b451eb

Browse files
Fix config prints and save, load of pipelines (#2849)
* [Config] Fix config prints and save, load * Only use potential nn.Modules for dtype and device * Correct vae image processor * make sure in_channels is not accessed directly * make sure in channels is only accessed via config * Make sure schedulers only access config attributes * Make sure to access config in SAG * Fix vae processor and make style * add tests * uP * make style * Fix more naming issues * Final fix with vae config * change more
1 parent 8369196 commit 8b451eb

File tree

66 files changed

+221
-105
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+221
-105
lines changed

docs/source/en/tutorials/basic_training.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ Now you can wrap all these components together in a training loop with 🤗 Acce
344344

345345
... # Sample a random timestep for each image
346346
... timesteps = torch.randint(
347-
... 0, noise_scheduler.num_train_timesteps, (bs,), device=clean_images.device
347+
... 0, noise_scheduler.config.num_train_timesteps, (bs,), device=clean_images.device
348348
... ).long()
349349

350350
... # Add noise to the clean images according to the noise magnitude at each timestep

docs/source/en/using-diffusers/contribute_pipeline.mdx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ class UnetSchedulerOneForwardPipeline(DiffusionPipeline):
6262

6363
def __call__(self):
6464
image = torch.randn(
65-
(1, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size),
65+
(1, self.unet.config.in_channels, self.unet.config.sample_size, self.unet.config.sample_size),
6666
)
6767
timestep = 1
6868

@@ -108,7 +108,7 @@ class UnetSchedulerOneForwardPipeline(DiffusionPipeline):
108108

109109
def __call__(self):
110110
image = torch.randn(
111-
(1, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size),
111+
(1, self.unet.config.in_channels, self.unet.config.sample_size, self.unet.config.sample_size),
112112
)
113113
timestep = 1
114114

docs/source/en/using-diffusers/custom_pipeline_overview.mdx

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,9 @@ class MyPipeline(DiffusionPipeline):
8989
@torch.no_grad()
9090
def __call__(self, batch_size: int = 1, num_inference_steps: int = 50):
9191
# Sample gaussian noise to begin loop
92-
image = torch.randn((batch_size, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size))
92+
image = torch.randn(
93+
(batch_size, self.unet.config.in_channels, self.unet.config.sample_size, self.unet.config.sample_size)
94+
)
9395

9496
image = image.to(self.device)
9597

examples/community/bit_diffusion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ def __call__(
238238
**kwargs,
239239
) -> Union[Tuple, ImagePipelineOutput]:
240240
latents = torch.randn(
241-
(batch_size, self.unet.in_channels, height, width),
241+
(batch_size, self.unet.config.in_channels, height, width),
242242
generator=generator,
243243
)
244244
latents = decimal_to_bits(latents) * self.bit_scale

examples/community/clip_guided_stable_diffusion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ def __call__(
254254
# Unlike in other pipelines, latents need to be generated in the target device
255255
# for 1-to-1 results reproducibility with the CompVis implementation.
256256
# However this currently doesn't work in `mps`.
257-
latents_shape = (batch_size * num_images_per_prompt, self.unet.in_channels, height // 8, width // 8)
257+
latents_shape = (batch_size * num_images_per_prompt, self.unet.config.in_channels, height // 8, width // 8)
258258
latents_dtype = text_embeddings.dtype
259259
if latents is None:
260260
if self.device.type == "mps":

examples/community/clip_guided_stable_diffusion_img2img.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,7 @@ def __call__(
414414
# Unlike in other pipelines, latents need to be generated in the target device
415415
# for 1-to-1 results reproducibility with the CompVis implementation.
416416
# However this currently doesn't work in `mps`.
417-
latents_shape = (batch_size * num_images_per_prompt, self.unet.in_channels, height // 8, width // 8)
417+
latents_shape = (batch_size * num_images_per_prompt, self.unet.config.in_channels, height // 8, width // 8)
418418
latents_dtype = text_embeddings.dtype
419419
if latents is None:
420420
if self.device.type == "mps":

examples/community/composable_stable_diffusion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -513,7 +513,7 @@ def __call__(
513513
timesteps = self.scheduler.timesteps
514514

515515
# 5. Prepare latent variables
516-
num_channels_latents = self.unet.in_channels
516+
num_channels_latents = self.unet.config.in_channels
517517
latents = self.prepare_latents(
518518
batch_size * num_images_per_prompt,
519519
num_channels_latents,

examples/community/imagic_stable_diffusion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ def __call__(
424424
# Unlike in other pipelines, latents need to be generated in the target device
425425
# for 1-to-1 results reproducibility with the CompVis implementation.
426426
# However this currently doesn't work in `mps`.
427-
latents_shape = (1, self.unet.in_channels, height // 8, width // 8)
427+
latents_shape = (1, self.unet.config.in_channels, height // 8, width // 8)
428428
latents_dtype = text_embeddings.dtype
429429
if self.device.type == "mps":
430430
# randn does not exist on mps

examples/community/interpolate_stable_diffusion.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ def __call__(
320320
# Unlike in other pipelines, latents need to be generated in the target device
321321
# for 1-to-1 results reproducibility with the CompVis implementation.
322322
# However this currently doesn't work in `mps`.
323-
latents_shape = (batch_size * num_images_per_prompt, self.unet.in_channels, height // 8, width // 8)
323+
latents_shape = (batch_size * num_images_per_prompt, self.unet.config.in_channels, height // 8, width // 8)
324324
latents_dtype = text_embeddings.dtype
325325
if latents is None:
326326
if self.device.type == "mps":
@@ -416,7 +416,7 @@ def embed_text(self, text):
416416
def get_noise(self, seed, dtype=torch.float32, height=512, width=512):
417417
"""Takes in random seed and returns corresponding noise vector"""
418418
return torch.randn(
419-
(1, self.unet.in_channels, height // 8, width // 8),
419+
(1, self.unet.config.in_channels, height // 8, width // 8),
420420
generator=torch.Generator(device=self.device).manual_seed(seed),
421421
device=self.device,
422422
dtype=dtype,

examples/community/lpw_stable_diffusion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -627,7 +627,7 @@ def prepare_latents(self, image, timestep, batch_size, height, width, dtype, dev
627627
if image is None:
628628
shape = (
629629
batch_size,
630-
self.unet.in_channels,
630+
self.unet.config.in_channels,
631631
height // self.vae_scale_factor,
632632
width // self.vae_scale_factor,
633633
)

0 commit comments

Comments
 (0)