Skip to content

Commit df1b065

Browse files
committed
major fixes.
1 parent 31d759e commit df1b065

File tree

7 files changed

+337
-296
lines changed

7 files changed

+337
-296
lines changed

src/diffusers/loaders/peft.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
"UNet2DConditionModel": _maybe_expand_lora_scales,
3333
"UNetMotionModel": _maybe_expand_lora_scales,
3434
"SD3Transformer2DModel": lambda model_cls, weights: weights,
35+
"FluxTransformer2DModel": lambda model_cls, weights: weights,
3536
}
3637

3738

src/diffusers/models/transformers/transformer_flux.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,6 @@ def forward(
373373
)
374374
encoder_hidden_states = self.context_embedder(encoder_hidden_states)
375375

376-
print(f"{txt_ids.shape=}, {img_ids.shape=}")
377376
ids = torch.cat((txt_ids, img_ids), dim=1)
378377
image_rotary_emb = self.pos_embed(ids)
379378

tests/lora/test_lora_layers_flux.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,11 @@
1515
import sys
1616
import unittest
1717

18+
import torch
19+
from transformers import AutoTokenizer, CLIPTextModel, CLIPTokenizer, T5EncoderModel
20+
1821
from diffusers import FlowMatchEulerDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
19-
from diffusers.utils.testing_utils import is_peft_available, require_peft_backend
22+
from diffusers.utils.testing_utils import floats_tensor, is_peft_available, require_peft_backend
2023

2124

2225
if is_peft_available():
@@ -32,6 +35,7 @@ class FluxLoRATests(unittest.TestCase, PeftLoraLoaderMixinTests):
3235
pipeline_class = FluxPipeline
3336
scheduler_cls = FlowMatchEulerDiscreteScheduler()
3437
scheduler_kwargs = {}
38+
uses_flow_matching = True
3539
transformer_kwargs = {
3640
"patch_size": 1,
3741
"in_channels": 4,
@@ -57,3 +61,35 @@ class FluxLoRATests(unittest.TestCase, PeftLoraLoaderMixinTests):
5761
"shift_factor": 0.0609,
5862
"scaling_factor": 1.5035,
5963
}
64+
has_two_text_encoders = True
65+
tokenizer = CLIPTokenizer.from_pretrained("peft-internal-testing/tiny-clip-text-2")
66+
tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
67+
text_encoder = CLIPTextModel.from_pretrained("peft-internal-testing/tiny-clip-text-2")
68+
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
69+
70+
@property
71+
def output_shape(self):
72+
return (1, 8, 8, 3)
73+
74+
def get_dummy_inputs(self, with_generator=True):
75+
batch_size = 1
76+
sequence_length = 10
77+
num_channels = 4
78+
sizes = (32, 32)
79+
80+
generator = torch.manual_seed(0)
81+
noise = floats_tensor((batch_size, num_channels) + sizes)
82+
input_ids = torch.randint(1, sequence_length, size=(batch_size, sequence_length), generator=generator)
83+
84+
pipeline_inputs = {
85+
"prompt": "A painting of a squirrel eating a burger",
86+
"num_inference_steps": 4,
87+
"guidance_scale": 0.0,
88+
"height": 8,
89+
"width": 8,
90+
"output_type": "np",
91+
}
92+
if with_generator:
93+
pipeline_inputs.update({"generator": generator})
94+
95+
return noise, input_ids, pipeline_inputs

tests/lora/test_lora_layers_sd.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from huggingface_hub import hf_hub_download
2323
from huggingface_hub.repocard import RepoCard
2424
from safetensors.torch import load_file
25+
from transformers import CLIPTextModel, CLIPTokenizer
2526

2627
from diffusers import (
2728
AutoPipelineForImage2Image,
@@ -80,6 +81,12 @@ class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
8081
"up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"],
8182
"latent_channels": 4,
8283
}
84+
text_encoder = CLIPTextModel.from_pretrained("peft-internal-testing/tiny-clip-text-2")
85+
tokenizer = CLIPTokenizer.from_pretrained("peft-internal-testing/tiny-clip-text-2")
86+
87+
@property
88+
def output_shape(self):
89+
return (1, 64, 64, 3)
8390

8491
def setUp(self):
8592
super().setUp()

tests/lora/test_lora_layers_sd3.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
import sys
1616
import unittest
1717

18+
from transformers import AutoTokenizer, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
19+
1820
from diffusers import FlowMatchEulerDiscreteScheduler, SD3Transformer2DModel, StableDiffusion3Pipeline
1921
from diffusers.utils.testing_utils import is_peft_available, require_peft_backend, require_torch_gpu, torch_device
2022

@@ -32,6 +34,7 @@ class SD3LoRATests(unittest.TestCase, PeftLoraLoaderMixinTests):
3234
pipeline_class = StableDiffusion3Pipeline
3335
scheduler_cls = FlowMatchEulerDiscreteScheduler()
3436
scheduler_kwargs = {}
37+
uses_flow_matching = True
3538
transformer_kwargs = {
3639
"sample_size": 32,
3740
"patch_size": 1,
@@ -59,6 +62,16 @@ class SD3LoRATests(unittest.TestCase, PeftLoraLoaderMixinTests):
5962
"scaling_factor": 1.5035,
6063
}
6164
has_three_text_encoders = True
65+
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
66+
tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
67+
tokenizer_3 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
68+
text_encoder = CLIPTextModelWithProjection.from_pretrained("hf-internal-testing/tiny-sd3-text_encoder")
69+
text_encoder_2 = CLIPTextModelWithProjection.from_pretrained("hf-internal-testing/tiny-sd3-text_encoder-2")
70+
text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
71+
72+
@property
73+
def output_shape(self):
74+
return (1, 32, 32, 3)
6275

6376
@require_torch_gpu
6477
def test_sd3_lora(self):

tests/lora/test_lora_layers_sdxl.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import numpy as np
2323
import torch
2424
from packaging import version
25+
from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer
2526

2627
from diffusers import (
2728
ControlNetModel,
@@ -89,6 +90,14 @@ class StableDiffusionXLLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
8990
"latent_channels": 4,
9091
"sample_size": 128,
9192
}
93+
text_encoder = CLIPTextModel.from_pretrained("peft-internal-testing/tiny-clip-text-2")
94+
tokenizer = CLIPTokenizer.from_pretrained("peft-internal-testing/tiny-clip-text-2")
95+
text_encoder_2 = CLIPTextModelWithProjection.from_pretrained("peft-internal-testing/tiny-clip-text-2")
96+
tokenizer_2 = CLIPTokenizer.from_pretrained("peft-internal-testing/tiny-clip-text-2")
97+
98+
@property
99+
def output_shape(self):
100+
return (1, 64, 64, 3)
92101

93102
def setUp(self):
94103
super().setUp()

0 commit comments

Comments
 (0)