Only patch text encoder related stuff when aux state is in

isidentical · isidentical · commit f778d91c5225 · 2023-07-19T20:07:42.000+03:00
diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
@@ -1139,7 +1139,7 @@ def load_lora_into_text_encoder(cls, state_dict, network_alpha, text_encoder, lo
                             f"{name}.out_proj.lora_linear_layer.down.weight"
                         ] = text_encoder_lora_state_dict.pop(f"{name}.to_out_lora.down.weight")
 
-                if text_encoder_lora_state_dict:
+                if state_dict_aux:
                     for name, _ in text_encoder_aux_modules(text_encoder):
                         for direction in ["up", "down"]:
                             for layer in ["fc1", "fc2"]:
@@ -1186,13 +1186,24 @@ def _remove_text_encoder_monkey_patch_classmethod(cls, text_encoder):
                 attn_module.v_proj = attn_module.v_proj.regular_linear_layer
                 attn_module.out_proj = attn_module.out_proj.regular_linear_layer
 
-        for _, aux_module in text_encoder_aux_modules(text_encoder):
-            if isinstance(aux_module.fc1, PatchedLoraProjection):
-                aux_module.fc1 = aux_module.fc1.regular_linear_layer
-                aux_module.fc2 = aux_module.fc2.regular_linear_layer
+        if getattr(text_encoder, "aux_state_dict_populated", False):
+            for _, aux_module in text_encoder_aux_modules(text_encoder):
+                if isinstance(aux_module.fc1, PatchedLoraProjection):
+                    aux_module.fc1 = aux_module.fc1.regular_linear_layer
+                    aux_module.fc2 = aux_module.fc2.regular_linear_layer
+
+            text_encoder.aux_state_dict_populated = False
 
     @classmethod
-    def _modify_text_encoder(cls, text_encoder, lora_scale=1, network_alpha=None, rank=4, dtype=None):
+    def _modify_text_encoder(
+        cls,
+        text_encoder,
+        lora_scale=1,
+        network_alpha=None,
+        rank=4,
+        dtype=None,
+        patch_aux=False,
+    ):
         r"""
         Monkey-patches the forward passes of attention modules of the text encoder.
         """
@@ -1223,12 +1234,19 @@ def _modify_text_encoder(cls, text_encoder, lora_scale=1, network_alpha=None, ra
             )
             lora_parameters.extend(attn_module.out_proj.lora_linear_layer.parameters())
 
-        for _, aux_module in text_encoder_aux_modules(text_encoder):
-            aux_module.fc1 = PatchedLoraProjection(aux_module.fc1, lora_scale, network_alpha, rank=rank, dtype=dtype)
-            lora_parameters.extend(aux_module.fc1.lora_linear_layer.parameters())
+        if patch_aux:
+            for _, aux_module in text_encoder_aux_modules(text_encoder):
+                aux_module.fc1 = PatchedLoraProjection(
+                    aux_module.fc1, lora_scale, network_alpha, rank=rank, dtype=dtype
+                )
+                lora_parameters.extend(aux_module.fc1.lora_linear_layer.parameters())
+
+                aux_module.fc2 = PatchedLoraProjection(
+                    aux_module.fc2, lora_scale, network_alpha, rank=rank, dtype=dtype
+                )
+                lora_parameters.extend(aux_module.fc2.lora_linear_layer.parameters())
 
-            aux_module.fc2 = PatchedLoraProjection(aux_module.fc2, lora_scale, network_alpha, rank=rank, dtype=dtype)
-            lora_parameters.extend(aux_module.fc2.lora_linear_layer.parameters())
+            text_encoder.aux_state_dict_populated = True
 
         return lora_parameters
 
diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
@@ -48,7 +48,6 @@ def forward(self, hidden_states):
         return up_hidden_states.to(orig_dtype)
 
 
-# copied from LoRAConv2dLayer
 class LoRAConv2dLayer(nn.Module):
     def __init__(self, in_features, out_features, rank=4, network_alpha=None):
         super().__init__()