Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions tests/pipelines/stable_diffusion/test_stable_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
UNet2DModel,
VQModel,
)
from diffusers.utils import floats_tensor, load_image, slow, torch_device
from diffusers.utils import floats_tensor, load_numpy, slow, torch_device
from diffusers.utils.testing_utils import require_torch_gpu
from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer

Expand Down Expand Up @@ -753,12 +753,10 @@ def test_stable_diffusion_text2img_pipeline_fp16(self):
# however, they should be extremely close.
assert diff.mean() < 2e-2

def test_stable_diffusion_text2img_pipeline(self):
expected_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/text2img/astronaut_riding_a_horse.png"
def test_stable_diffusion_text2img_pipeline_default(self):
expected_image = load_numpy(
"https://huggingface.co/datasets/lewington/expected-images/resolve/main/astronaut_riding_a_horse.npy"
)
expected_image = np.array(expected_image, dtype=np.float32) / 255.0

model_id = "CompVis/stable-diffusion-v1-4"
pipe = StableDiffusionPipeline.from_pretrained(model_id, safety_checker=None)
Expand All @@ -773,7 +771,7 @@ def test_stable_diffusion_text2img_pipeline(self):
image = output.images[0]

assert image.shape == (512, 512, 3)
assert np.abs(expected_image - image).max() < 1e-2
assert np.abs(expected_image - image).max() < 1e-3

def test_stable_diffusion_text2img_intermediate_state(self):
number_of_steps = 0
Expand Down
38 changes: 17 additions & 21 deletions tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
UNet2DModel,
VQModel,
)
from diffusers.utils import floats_tensor, load_image, slow, torch_device
from diffusers.utils import floats_tensor, load_image, load_numpy, slow, torch_device
from diffusers.utils.testing_utils import require_torch_gpu
from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer

Expand Down Expand Up @@ -156,7 +156,7 @@ def to(self, device):

return extract

def test_stable_diffusion_img2img(self):
def test_stable_diffusion_img2img_default_case(self):
device = "cpu" # ensure determinism for the device-dependent torch.Generator
unet = self.dummy_cond_unet
scheduler = PNDMScheduler(skip_prk_steps=True)
Expand Down Expand Up @@ -208,8 +208,8 @@ def test_stable_diffusion_img2img(self):

assert image.shape == (1, 32, 32, 3)
expected_slice = np.array([0.4492, 0.3865, 0.4222, 0.5854, 0.5139, 0.4379, 0.4193, 0.48, 0.4218])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3
assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-3

def test_stable_diffusion_img2img_negative_prompt(self):
device = "cpu" # ensure determinism for the device-dependent torch.Generator
Expand Down Expand Up @@ -251,7 +251,7 @@ def test_stable_diffusion_img2img_negative_prompt(self):

assert image.shape == (1, 32, 32, 3)
expected_slice = np.array([0.4065, 0.3783, 0.4050, 0.5266, 0.4781, 0.4252, 0.4203, 0.4692, 0.4365])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3

def test_stable_diffusion_img2img_multiple_init_images(self):
device = "cpu" # ensure determinism for the device-dependent torch.Generator
Expand Down Expand Up @@ -293,7 +293,7 @@ def test_stable_diffusion_img2img_multiple_init_images(self):

assert image.shape == (2, 32, 32, 3)
expected_slice = np.array([0.5144, 0.4447, 0.4735, 0.6676, 0.5526, 0.5454, 0.645, 0.5149, 0.4689])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3

def test_stable_diffusion_img2img_k_lms(self):
device = "cpu" # ensure determinism for the device-dependent torch.Generator
Expand Down Expand Up @@ -348,8 +348,8 @@ def test_stable_diffusion_img2img_k_lms(self):

assert image.shape == (1, 32, 32, 3)
expected_slice = np.array([0.4367, 0.4986, 0.4372, 0.6706, 0.5665, 0.444, 0.5864, 0.6019, 0.5203])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3
assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-3

def test_stable_diffusion_img2img_num_images_per_prompt(self):
device = "cpu"
Expand Down Expand Up @@ -472,17 +472,15 @@ def tearDown(self):
gc.collect()
torch.cuda.empty_cache()

def test_stable_diffusion_img2img_pipeline(self):
def test_stable_diffusion_img2img_pipeline_default(self):
init_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/img2img/sketch-mountains-input.jpg"
)
expected_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/img2img/fantasy_landscape.png"
)
init_image = init_image.resize((768, 512))
expected_image = np.array(expected_image, dtype=np.float32) / 255.0
expected_image = load_numpy(
"https://huggingface.co/datasets/lewington/expected-images/resolve/main/fantasy_landscape.npy"
)

model_id = "CompVis/stable-diffusion-v1-4"
pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
Expand All @@ -508,19 +506,17 @@ def test_stable_diffusion_img2img_pipeline(self):

assert image.shape == (512, 768, 3)
# img2img is flaky across GPUs even in fp32, so using MAE here
assert np.abs(expected_image - image).mean() < 1e-2
assert np.abs(expected_image - image).mean() < 1e-3

def test_stable_diffusion_img2img_pipeline_k_lms(self):
init_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/img2img/sketch-mountains-input.jpg"
)
expected_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/img2img/fantasy_landscape_k_lms.png"
)
init_image = init_image.resize((768, 512))
expected_image = np.array(expected_image, dtype=np.float32) / 255.0
expected_image = load_numpy(
"https://huggingface.co/datasets/lewington/expected-images/resolve/main/fantasy_landscape_k_lms.npy"
)

model_id = "CompVis/stable-diffusion-v1-4"
lms = LMSDiscreteScheduler.from_config(model_id, subfolder="scheduler")
Expand Down Expand Up @@ -548,7 +544,7 @@ def test_stable_diffusion_img2img_pipeline_k_lms(self):

assert image.shape == (512, 768, 3)
# img2img is flaky across GPUs even in fp32, so using MAE here
assert np.abs(expected_image - image).mean() < 1e-2
assert np.abs(expected_image - image).mean() < 1e-3

def test_stable_diffusion_img2img_intermediate_state(self):
number_of_steps = 0
Expand Down