From 0d3e9052f796cfb88be44502949a1be972b564a5 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Fri, 5 Apr 2024 10:08:08 +0530
Subject: [PATCH 1/6] give it a shot.

---
 .../stable_diffusion/test_stable_diffusion.py        | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion.py b/tests/pipelines/stable_diffusion/test_stable_diffusion.py
index bb3869947f12..31c44d8a0811 100644
--- a/tests/pipelines/stable_diffusion/test_stable_diffusion.py
+++ b/tests/pipelines/stable_diffusion/test_stable_diffusion.py
@@ -124,6 +124,8 @@ class StableDiffusionPipelineFastTests(
     callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS
 
     def get_dummy_components(self, time_cond_proj_dim=None):
+        cross_attention_dim = 8
+
         torch.manual_seed(0)
         unet = UNet2DConditionModel(
             block_out_channels=(4, 8),
@@ -134,7 +136,7 @@ def get_dummy_components(self, time_cond_proj_dim=None):
             out_channels=4,
             down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
             up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
-            cross_attention_dim=32,
+            cross_attention_dim=cross_attention_dim,
             norm_num_groups=2,
         )
         scheduler = DDIMScheduler(
@@ -158,11 +160,11 @@ def get_dummy_components(self, time_cond_proj_dim=None):
         text_encoder_config = CLIPTextConfig(
             bos_token_id=0,
             eos_token_id=2,
-            hidden_size=32,
-            intermediate_size=64,
+            hidden_size=cross_attention_dim,
+            intermediate_size=16,
             layer_norm_eps=1e-05,
-            num_attention_heads=8,
-            num_hidden_layers=3,
+            num_attention_heads=2,
+            num_hidden_layers=2,
             pad_token_id=1,
             vocab_size=1000,
         )

From d005fcbcc4ce4cffed383b59c00292995512394e Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Fri, 5 Apr 2024 10:15:22 +0530
Subject: [PATCH 2/6] print.

---
 tests/pipelines/stable_diffusion/test_stable_diffusion.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion.py b/tests/pipelines/stable_diffusion/test_stable_diffusion.py
index 31c44d8a0811..7654c2e29992 100644
--- a/tests/pipelines/stable_diffusion/test_stable_diffusion.py
+++ b/tests/pipelines/stable_diffusion/test_stable_diffusion.py
@@ -210,6 +210,7 @@ def test_stable_diffusion_ddim(self):
         image = output.images
 
         image_slice = image[0, -3:, -3:, -1]
+        print(", ".join([str(round(x, 4)) for x in image_slice.flatten().tolist()]))
 
         assert image.shape == (1, 64, 64, 3)
         expected_slice = np.array([0.3203, 0.4555, 0.4711, 0.3505, 0.3973, 0.4650, 0.5137, 0.3392, 0.4045])

From 8762d6ed7d894a231bce76989ec580331841882a Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Fri, 5 Apr 2024 10:16:21 +0530
Subject: [PATCH 3/6] correct assertion.

---
 tests/pipelines/stable_diffusion/test_stable_diffusion.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion.py b/tests/pipelines/stable_diffusion/test_stable_diffusion.py
index 7654c2e29992..a0b863f3ebe9 100644
--- a/tests/pipelines/stable_diffusion/test_stable_diffusion.py
+++ b/tests/pipelines/stable_diffusion/test_stable_diffusion.py
@@ -213,7 +213,7 @@ def test_stable_diffusion_ddim(self):
         print(", ".join([str(round(x, 4)) for x in image_slice.flatten().tolist()]))
 
         assert image.shape == (1, 64, 64, 3)
-        expected_slice = np.array([0.3203, 0.4555, 0.4711, 0.3505, 0.3973, 0.4650, 0.5137, 0.3392, 0.4045])
+        expected_slice = np.array([0.1763, 0.4776, 0.4986, 0.2566, 0.3802, 0.4596, 0.5363, 0.3277, 0.3949])
 
         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
 

From eba1ea2f0ffd788976ce7737010a36829594ea7c Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Fri, 5 Apr 2024 10:20:25 +0530
Subject: [PATCH 4/6] gather results from the rest of the tests.

---
 .../stable_diffusion/test_stable_diffusion.py | 20 +++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion.py b/tests/pipelines/stable_diffusion/test_stable_diffusion.py
index a0b863f3ebe9..6ecfa0e8614b 100644
--- a/tests/pipelines/stable_diffusion/test_stable_diffusion.py
+++ b/tests/pipelines/stable_diffusion/test_stable_diffusion.py
@@ -57,6 +57,7 @@
     skip_mps,
     slow,
     torch_device,
+    print_tensor_test
 )
 
 from ..pipeline_params import (
@@ -210,7 +211,6 @@ def test_stable_diffusion_ddim(self):
         image = output.images
 
         image_slice = image[0, -3:, -3:, -1]
-        print(", ".join([str(round(x, 4)) for x in image_slice.flatten().tolist()]))
 
         assert image.shape == (1, 64, 64, 3)
         expected_slice = np.array([0.1763, 0.4776, 0.4986, 0.2566, 0.3802, 0.4596, 0.5363, 0.3277, 0.3949])
@@ -231,6 +231,7 @@ def test_stable_diffusion_lcm(self):
         image = output.images
 
         image_slice = image[0, -3:, -3:, -1]
+        print_tensor_test(tensor=image_slice, max_torch_print=True)
 
         assert image.shape == (1, 64, 64, 3)
         expected_slice = np.array([0.3454, 0.5349, 0.5185, 0.2808, 0.4509, 0.4612, 0.4655, 0.3601, 0.4315])
@@ -253,6 +254,7 @@ def test_stable_diffusion_lcm_custom_timesteps(self):
         image = output.images
 
         image_slice = image[0, -3:, -3:, -1]
+        print_tensor_test(tensor=image_slice, max_torch_print=True)
 
         assert image.shape == (1, 64, 64, 3)
         expected_slice = np.array([0.3454, 0.5349, 0.5185, 0.2808, 0.4509, 0.4612, 0.4655, 0.3601, 0.4315])
@@ -374,11 +376,11 @@ def test_stable_diffusion_prompt_embeds_with_plain_negative_prompt_list(self):
 
         assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4
 
-    def test_ip_adapter_single(self):
-        expected_pipe_slice = None
-        if torch_device == "cpu":
-            expected_pipe_slice = np.array([0.3203, 0.4555, 0.4711, 0.3505, 0.3973, 0.4650, 0.5137, 0.3392, 0.4045])
-        return super().test_ip_adapter_single(expected_pipe_slice=expected_pipe_slice)
+    # def test_ip_adapter_single(self):
+    #     expected_pipe_slice = None
+    #     if torch_device == "cpu":
+    #         expected_pipe_slice = np.array([0.3203, 0.4555, 0.4711, 0.3505, 0.3973, 0.4650, 0.5137, 0.3392, 0.4045])
+    #     return super().test_ip_adapter_single(expected_pipe_slice=expected_pipe_slice)
 
     def test_stable_diffusion_ddim_factor_8(self):
         device = "cpu"  # ensure determinism for the device-dependent torch.Generator
@@ -393,6 +395,7 @@ def test_stable_diffusion_ddim_factor_8(self):
         image = output.images
 
         image_slice = image[0, -3:, -3:, -1]
+        print_tensor_test(tensor=image_slice, max_torch_print=True)
 
         assert image.shape == (1, 136, 136, 3)
         expected_slice = np.array([0.4346, 0.5621, 0.5016, 0.3926, 0.4533, 0.4134, 0.5625, 0.5632, 0.5265])
@@ -411,6 +414,7 @@ def test_stable_diffusion_pndm(self):
         output = sd_pipe(**inputs)
         image = output.images
         image_slice = image[0, -3:, -3:, -1]
+        print_tensor_test(tensor=image_slice, max_torch_print=True)
 
         assert image.shape == (1, 64, 64, 3)
         expected_slice = np.array([0.3411, 0.5032, 0.4704, 0.3135, 0.4323, 0.4740, 0.5150, 0.3498, 0.4022])
@@ -451,6 +455,7 @@ def test_stable_diffusion_k_lms(self):
         output = sd_pipe(**inputs)
         image = output.images
         image_slice = image[0, -3:, -3:, -1]
+        print_tensor_test(tensor=image_slice, max_torch_print=True)
 
         assert image.shape == (1, 64, 64, 3)
         expected_slice = np.array([0.3149, 0.5246, 0.4796, 0.3218, 0.4469, 0.4729, 0.5151, 0.3597, 0.3954])
@@ -470,6 +475,7 @@ def test_stable_diffusion_k_euler_ancestral(self):
         output = sd_pipe(**inputs)
         image = output.images
         image_slice = image[0, -3:, -3:, -1]
+        print_tensor_test(tensor=image_slice, max_torch_print=True)
 
         assert image.shape == (1, 64, 64, 3)
         expected_slice = np.array([0.3151, 0.5243, 0.4794, 0.3217, 0.4468, 0.4728, 0.5152, 0.3598, 0.3954])
@@ -489,6 +495,7 @@ def test_stable_diffusion_k_euler(self):
         output = sd_pipe(**inputs)
         image = output.images
         image_slice = image[0, -3:, -3:, -1]
+        print_tensor_test(tensor=image_slice, max_torch_print=True)
 
         assert image.shape == (1, 64, 64, 3)
         expected_slice = np.array([0.3149, 0.5246, 0.4796, 0.3218, 0.4469, 0.4729, 0.5151, 0.3597, 0.3954])
@@ -561,6 +568,7 @@ def test_stable_diffusion_negative_prompt(self):
 
         image = output.images
         image_slice = image[0, -3:, -3:, -1]
+        print_tensor_test(tensor=image_slice, max_torch_print=True)
 
         assert image.shape == (1, 64, 64, 3)
         expected_slice = np.array([0.3458, 0.5120, 0.4800, 0.3116, 0.4348, 0.4802, 0.5237, 0.3467, 0.3991])

From a2a857c07f343e31e0069ded76a1e77e1cb1aa74 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Fri, 5 Apr 2024 10:26:44 +0530
Subject: [PATCH 5/6] change the assertion values where needed.

---
 .../stable_diffusion/test_stable_diffusion.py | 22 +++++++------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion.py b/tests/pipelines/stable_diffusion/test_stable_diffusion.py
index 6ecfa0e8614b..369fa30a74f8 100644
--- a/tests/pipelines/stable_diffusion/test_stable_diffusion.py
+++ b/tests/pipelines/stable_diffusion/test_stable_diffusion.py
@@ -234,7 +234,7 @@ def test_stable_diffusion_lcm(self):
         print_tensor_test(tensor=image_slice, max_torch_print=True)
 
         assert image.shape == (1, 64, 64, 3)
-        expected_slice = np.array([0.3454, 0.5349, 0.5185, 0.2808, 0.4509, 0.4612, 0.4655, 0.3601, 0.4315])
+        expected_slice = np.array([0.2368, 0.4900, 0.5019, 0.2723, 0.4473, 0.4578, 0.4551, 0.3532, 0.4133])
 
         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
 
@@ -257,7 +257,7 @@ def test_stable_diffusion_lcm_custom_timesteps(self):
         print_tensor_test(tensor=image_slice, max_torch_print=True)
 
         assert image.shape == (1, 64, 64, 3)
-        expected_slice = np.array([0.3454, 0.5349, 0.5185, 0.2808, 0.4509, 0.4612, 0.4655, 0.3601, 0.4315])
+        expected_slice = np.array([0.2368, 0.4900, 0.5019, 0.2723, 0.4473, 0.4578, 0.4551, 0.3532, 0.4133])
 
         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
 
@@ -376,12 +376,6 @@ def test_stable_diffusion_prompt_embeds_with_plain_negative_prompt_list(self):
 
         assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4
 
-    # def test_ip_adapter_single(self):
-    #     expected_pipe_slice = None
-    #     if torch_device == "cpu":
-    #         expected_pipe_slice = np.array([0.3203, 0.4555, 0.4711, 0.3505, 0.3973, 0.4650, 0.5137, 0.3392, 0.4045])
-    #     return super().test_ip_adapter_single(expected_pipe_slice=expected_pipe_slice)
-
     def test_stable_diffusion_ddim_factor_8(self):
         device = "cpu"  # ensure determinism for the device-dependent torch.Generator
 
@@ -398,7 +392,7 @@ def test_stable_diffusion_ddim_factor_8(self):
         print_tensor_test(tensor=image_slice, max_torch_print=True)
 
         assert image.shape == (1, 136, 136, 3)
-        expected_slice = np.array([0.4346, 0.5621, 0.5016, 0.3926, 0.4533, 0.4134, 0.5625, 0.5632, 0.5265])
+        expected_slice = np.array([0.4720, 0.5426, 0.5160, 0.3961, 0.4696, 0.4296, 0.5738, 0.5888, 0.5481])
 
         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
 
@@ -417,7 +411,7 @@ def test_stable_diffusion_pndm(self):
         print_tensor_test(tensor=image_slice, max_torch_print=True)
 
         assert image.shape == (1, 64, 64, 3)
-        expected_slice = np.array([0.3411, 0.5032, 0.4704, 0.3135, 0.4323, 0.4740, 0.5150, 0.3498, 0.4022])
+        expected_slice = np.array([0.1941, 0.4748, 0.4880, 0.2222, 0.4221, 0.4545, 0.5604, 0.3488, 0.3902])
 
         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
 
@@ -458,7 +452,7 @@ def test_stable_diffusion_k_lms(self):
         print_tensor_test(tensor=image_slice, max_torch_print=True)
 
         assert image.shape == (1, 64, 64, 3)
-        expected_slice = np.array([0.3149, 0.5246, 0.4796, 0.3218, 0.4469, 0.4729, 0.5151, 0.3597, 0.3954])
+        expected_slice = np.array([0.2681, 0.4785, 0.4857, 0.2426, 0.4473, 0.4481, 0.5610, 0.3676, 0.3855])
 
         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
 
@@ -478,7 +472,7 @@ def test_stable_diffusion_k_euler_ancestral(self):
         print_tensor_test(tensor=image_slice, max_torch_print=True)
 
         assert image.shape == (1, 64, 64, 3)
-        expected_slice = np.array([0.3151, 0.5243, 0.4794, 0.3217, 0.4468, 0.4728, 0.5152, 0.3598, 0.3954])
+        expected_slice = np.array([0.2682, 0.4782, 0.4855, 0.2424, 0.4472, 0.4479, 0.5612, 0.3676, 0.3854])
 
         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
 
@@ -498,7 +492,7 @@ def test_stable_diffusion_k_euler(self):
         print_tensor_test(tensor=image_slice, max_torch_print=True)
 
         assert image.shape == (1, 64, 64, 3)
-        expected_slice = np.array([0.3149, 0.5246, 0.4796, 0.3218, 0.4469, 0.4729, 0.5151, 0.3597, 0.3954])
+        expected_slice = np.array([0.2681, 0.4785, 0.4857, 0.2426, 0.4473, 0.4481, 0.5610, 0.3676, 0.3855])
 
         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
 
@@ -571,7 +565,7 @@ def test_stable_diffusion_negative_prompt(self):
         print_tensor_test(tensor=image_slice, max_torch_print=True)
 
         assert image.shape == (1, 64, 64, 3)
-        expected_slice = np.array([0.3458, 0.5120, 0.4800, 0.3116, 0.4348, 0.4802, 0.5237, 0.3467, 0.3991])
+        expected_slice = np.array([0.1907, 0.4709, 0.4858, 0.2224, 0.4223, 0.4539, 0.5606, 0.3489, 0.3900])
 
         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
 

From dce80428907c23aaa9aa1f3dccbc00796fff0010 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Fri, 5 Apr 2024 10:28:37 +0530
Subject: [PATCH 6/6] remove print statements.

---
 .../pipelines/stable_diffusion/test_stable_diffusion.py  | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion.py b/tests/pipelines/stable_diffusion/test_stable_diffusion.py
index 369fa30a74f8..bba43d49b894 100644
--- a/tests/pipelines/stable_diffusion/test_stable_diffusion.py
+++ b/tests/pipelines/stable_diffusion/test_stable_diffusion.py
@@ -57,7 +57,6 @@
     skip_mps,
     slow,
     torch_device,
-    print_tensor_test
 )
 
 from ..pipeline_params import (
@@ -231,7 +230,6 @@ def test_stable_diffusion_lcm(self):
         image = output.images
 
         image_slice = image[0, -3:, -3:, -1]
-        print_tensor_test(tensor=image_slice, max_torch_print=True)
 
         assert image.shape == (1, 64, 64, 3)
         expected_slice = np.array([0.2368, 0.4900, 0.5019, 0.2723, 0.4473, 0.4578, 0.4551, 0.3532, 0.4133])
@@ -254,7 +252,6 @@ def test_stable_diffusion_lcm_custom_timesteps(self):
         image = output.images
 
         image_slice = image[0, -3:, -3:, -1]
-        print_tensor_test(tensor=image_slice, max_torch_print=True)
 
         assert image.shape == (1, 64, 64, 3)
         expected_slice = np.array([0.2368, 0.4900, 0.5019, 0.2723, 0.4473, 0.4578, 0.4551, 0.3532, 0.4133])
@@ -389,7 +386,6 @@ def test_stable_diffusion_ddim_factor_8(self):
         image = output.images
 
         image_slice = image[0, -3:, -3:, -1]
-        print_tensor_test(tensor=image_slice, max_torch_print=True)
 
         assert image.shape == (1, 136, 136, 3)
         expected_slice = np.array([0.4720, 0.5426, 0.5160, 0.3961, 0.4696, 0.4296, 0.5738, 0.5888, 0.5481])
@@ -408,7 +404,6 @@ def test_stable_diffusion_pndm(self):
         output = sd_pipe(**inputs)
         image = output.images
         image_slice = image[0, -3:, -3:, -1]
-        print_tensor_test(tensor=image_slice, max_torch_print=True)
 
         assert image.shape == (1, 64, 64, 3)
         expected_slice = np.array([0.1941, 0.4748, 0.4880, 0.2222, 0.4221, 0.4545, 0.5604, 0.3488, 0.3902])
@@ -449,7 +444,6 @@ def test_stable_diffusion_k_lms(self):
         output = sd_pipe(**inputs)
         image = output.images
         image_slice = image[0, -3:, -3:, -1]
-        print_tensor_test(tensor=image_slice, max_torch_print=True)
 
         assert image.shape == (1, 64, 64, 3)
         expected_slice = np.array([0.2681, 0.4785, 0.4857, 0.2426, 0.4473, 0.4481, 0.5610, 0.3676, 0.3855])
@@ -469,7 +463,6 @@ def test_stable_diffusion_k_euler_ancestral(self):
         output = sd_pipe(**inputs)
         image = output.images
         image_slice = image[0, -3:, -3:, -1]
-        print_tensor_test(tensor=image_slice, max_torch_print=True)
 
         assert image.shape == (1, 64, 64, 3)
         expected_slice = np.array([0.2682, 0.4782, 0.4855, 0.2424, 0.4472, 0.4479, 0.5612, 0.3676, 0.3854])
@@ -489,7 +482,6 @@ def test_stable_diffusion_k_euler(self):
         output = sd_pipe(**inputs)
         image = output.images
         image_slice = image[0, -3:, -3:, -1]
-        print_tensor_test(tensor=image_slice, max_torch_print=True)
 
         assert image.shape == (1, 64, 64, 3)
         expected_slice = np.array([0.2681, 0.4785, 0.4857, 0.2426, 0.4473, 0.4481, 0.5610, 0.3676, 0.3855])
@@ -562,7 +554,6 @@ def test_stable_diffusion_negative_prompt(self):
 
         image = output.images
         image_slice = image[0, -3:, -3:, -1]
-        print_tensor_test(tensor=image_slice, max_torch_print=True)
 
         assert image.shape == (1, 64, 64, 3)
         expected_slice = np.array([0.1907, 0.4709, 0.4858, 0.2224, 0.4223, 0.4539, 0.5606, 0.3489, 0.3900])