labmlai · CatManJr · Nov 28, 2024 · Nov 28, 2024 · Nov 28, 2024
diff --git a/labml_nn/diffusion/stable_diffusion/scripts/image_to_image.py b/labml_nn/diffusion/stable_diffusion/scripts/image_to_image.py
@@ -12,6 +12,7 @@
 from pathlib import Path
 
 import torch
+from torchvision import transforms
 
 from labml import lab, monit
 from labml_nn.diffusion.stable_diffusion.sampler.ddim import DDIMSampler
@@ -69,6 +70,8 @@ def __call__(self, *,
         orig_image = load_img(orig_img).to(self.device)
         # Encode the image in the latent space and make `batch_size` copies of it
         orig = self.model.autoencoder_encode(orig_image).repeat(batch_size, 1, 1, 1)
+        # Encode the image in the latent space and make `batch_size` copies of it
+        orig = self.model.autoencoder_encode(orig_image).repeat(batch_size, 1, 1, 1)
 
         # Get the number of steps to diffuse the original
         assert 0. <= strength <= 1., 'can only work with strength in [0.0, 1.0]'

diff --git a/labml_nn/diffusion/stable_diffusion/util.py b/labml_nn/diffusion/stable_diffusion/util.py
@@ -115,9 +115,9 @@ def load_img(path: str):
     image = Image.open(path).convert("RGB")
     # Get image size
     w, h = image.size
-    # Resize to a multiple of 32
-    w = w - w % 32
-    h = h - h % 32
+    # Resize to a multiple of 64
+    w = w - w % 64
+    h = h - h % 64
     image = image.resize((w, h), resample=PIL.Image.LANCZOS)
     # Convert to numpy and map to `[-1, 1]` for `[0, 255]`
     image = np.array(image).astype(np.float32) * (2. / 255.0) - 1