huggingface · patrickvonplaten · Sep 4, 2023 · Aug 30, 2023 · Aug 30, 2023 · Aug 30, 2023
diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
@@ -95,7 +95,7 @@ def state_dict(self, *args, destination=None, prefix="", keep_vars=False):
 
         return super().state_dict(*args, destination=destination, prefix=prefix, keep_vars=keep_vars)
 
-    def _fuse_lora(self):
+    def _fuse_lora(self, lora_scale=1.0):
         if self.lora_linear_layer is None:
             return
 
@@ -108,7 +108,7 @@ def _fuse_lora(self):
         if self.lora_linear_layer.network_alpha is not None:
             w_up = w_up * self.lora_linear_layer.network_alpha / self.lora_linear_layer.rank
 
-        fused_weight = w_orig + torch.bmm(w_up[None, :], w_down[None, :])[0]
+        fused_weight = w_orig + (lora_scale * torch.bmm(w_up[None, :], w_down[None, :])[0])
         self.regular_linear_layer.weight.data = fused_weight.to(device=device, dtype=dtype)
 
         # we can drop the lora layer now
@@ -117,6 +117,7 @@ def _fuse_lora(self):
         # offload the up and down matrices to CPU to not blow the memory
         self.w_up = w_up.cpu()
         self.w_down = w_down.cpu()
+        self.lora_scale = lora_scale
 
     def _unfuse_lora(self):
         if not (hasattr(self, "w_up") and hasattr(self, "w_down")):
@@ -128,16 +129,19 @@ def _unfuse_lora(self):
         w_up = self.w_up.to(device=device).float()
         w_down = self.w_down.to(device).float()
 
-        unfused_weight = fused_weight.float() - torch.bmm(w_up[None, :], w_down[None, :])[0]
+        unfused_weight = fused_weight.float() - (self.lora_scale * torch.bmm(w_up[None, :], w_down[None, :])[0])
         self.regular_linear_layer.weight.data = unfused_weight.to(device=device, dtype=dtype)
 
         self.w_up = None
         self.w_down = None
 
     def forward(self, input):
+        # print(f"{self.__class__.__name__} has a lora_scale of {self.lora_scale}")
+        if self.lora_scale is None:
+            self.lora_scale = 1.0
         if self.lora_linear_layer is None:
             return self.regular_linear_layer(input)
-        return self.regular_linear_layer(input) + self.lora_scale * self.lora_linear_layer(input)
+        return self.regular_linear_layer(input) + (self.lora_scale * self.lora_linear_layer(input))
 
 
 def text_encoder_attn_modules(text_encoder):
@@ -576,12 +580,13 @@ def save_function(weights, filename):
         save_function(state_dict, os.path.join(save_directory, weight_name))
         logger.info(f"Model weights saved in {os.path.join(save_directory, weight_name)}")
 
-    def fuse_lora(self):
+    def fuse_lora(self, lora_scale=1.0):
+        self.lora_scale = lora_scale
         self.apply(self._fuse_lora_apply)
 
     def _fuse_lora_apply(self, module):
         if hasattr(module, "_fuse_lora"):
-            module._fuse_lora()
+            module._fuse_lora(self.lora_scale)
 
     def unfuse_lora(self):
         self.apply(self._unfuse_lora_apply)
@@ -924,6 +929,7 @@ class LoraLoaderMixin:
     """
     text_encoder_name = TEXT_ENCODER_NAME
     unet_name = UNET_NAME
+    num_fused_loras = 0
 
     def load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], **kwargs):
         """
@@ -1807,7 +1813,7 @@ def unload_lora_weights(self):
         # Safe to call the following regardless of LoRA.
         self._remove_text_encoder_monkey_patch()
 
-    def fuse_lora(self, fuse_unet: bool = True, fuse_text_encoder: bool = True):
+    def fuse_lora(self, fuse_unet: bool = True, fuse_text_encoder: bool = True, lora_scale: float = 1.0):
         r"""
         Fuses the LoRA parameters into the original parameters of the corresponding blocks.
 
@@ -1822,22 +1828,31 @@ def fuse_lora(self, fuse_unet: bool = True, fuse_text_encoder: bool = True):
             fuse_text_encoder (`bool`, defaults to `True`):
                 Whether to fuse the text encoder LoRA parameters. If the text encoder wasn't monkey-patched with the
                 LoRA parameters then it won't have any effect.
+            lora_scale (`float`, defaults to 1.0):
+                Controls how much to influence the outputs with the LoRA parameters.
         """
+        if fuse_unet or fuse_text_encoder:
+            self.num_fused_loras += 1
+            if self.num_fused_loras > 1:
+                logger.warn(
+                    "The current API is supported for operating with a single LoRA file. You are trying to load and fuse more than one LoRA which is not well-supported.",
+                )
+
         if fuse_unet:
-            self.unet.fuse_lora()
+            self.unet.fuse_lora(lora_scale)
 
         def fuse_text_encoder_lora(text_encoder):
             for _, attn_module in text_encoder_attn_modules(text_encoder):
                 if isinstance(attn_module.q_proj, PatchedLoraProjection):
-                    attn_module.q_proj._fuse_lora()
-                    attn_module.k_proj._fuse_lora()
-                    attn_module.v_proj._fuse_lora()
-                    attn_module.out_proj._fuse_lora()
+                    attn_module.q_proj._fuse_lora(lora_scale)
+                    attn_module.k_proj._fuse_lora(lora_scale)
+                    attn_module.v_proj._fuse_lora(lora_scale)
+                    attn_module.out_proj._fuse_lora(lora_scale)
 
             for _, mlp_module in text_encoder_mlp_modules(text_encoder):
                 if isinstance(mlp_module.fc1, PatchedLoraProjection):
-                    mlp_module.fc1._fuse_lora()
-                    mlp_module.fc2._fuse_lora()
+                    mlp_module.fc1._fuse_lora(lora_scale)
+                    mlp_module.fc2._fuse_lora(lora_scale)
 
         if fuse_text_encoder:
             if hasattr(self, "text_encoder"):
@@ -1884,6 +1899,8 @@ def unfuse_text_encoder_lora(text_encoder):
             if hasattr(self, "text_encoder_2"):
                 unfuse_text_encoder_lora(self.text_encoder_2)
 
+        self.num_fused_loras -= 1
+
 
 class FromSingleFileMixin:
     """

diff --git a/src/diffusers/models/attention.py b/src/diffusers/models/attention.py
@@ -177,7 +177,7 @@ def forward(
         class_labels: Optional[torch.LongTensor] = None,
     ):
         # Notice that normalization is always applied before the real computation in the following blocks.
-        # 1. Self-Attention
+        # 0. Self-Attention
         if self.use_ada_layer_norm:
             norm_hidden_states = self.norm1(hidden_states, timestep)
         elif self.use_ada_layer_norm_zero:
@@ -187,7 +187,10 @@ def forward(
         else:
             norm_hidden_states = self.norm1(hidden_states)
 
-        # 0. Prepare GLIGEN inputs
+        # 1. Retrieve lora scale.
+        lora_scale = cross_attention_kwargs.get("scale", 1.0) if cross_attention_kwargs is not None else 1.0
+
+        # 2. Prepare GLIGEN inputs
         cross_attention_kwargs = cross_attention_kwargs.copy() if cross_attention_kwargs is not None else {}
         gligen_kwargs = cross_attention_kwargs.pop("gligen", None)
 
@@ -201,12 +204,12 @@ def forward(
             attn_output = gate_msa.unsqueeze(1) * attn_output
         hidden_states = attn_output + hidden_states
 
-        # 1.5 GLIGEN Control
+        # 2.5 GLIGEN Control
         if gligen_kwargs is not None:
             hidden_states = self.fuser(hidden_states, gligen_kwargs["objs"])
-        # 1.5 ends
+        # 2.5 ends
 
-        # 2. Cross-Attention
+        # 3. Cross-Attention
         if self.attn2 is not None:
             norm_hidden_states = (
                 self.norm2(hidden_states, timestep) if self.use_ada_layer_norm else self.norm2(hidden_states)
@@ -220,7 +223,7 @@ def forward(
             )
             hidden_states = attn_output + hidden_states
 
-        # 3. Feed-forward
+        # 4. Feed-forward
         norm_hidden_states = self.norm3(hidden_states)
 
         if self.use_ada_layer_norm_zero:
@@ -235,11 +238,14 @@ def forward(
 
             num_chunks = norm_hidden_states.shape[self._chunk_dim] // self._chunk_size
             ff_output = torch.cat(
-                [self.ff(hid_slice) for hid_slice in norm_hidden_states.chunk(num_chunks, dim=self._chunk_dim)],
+                [
+                    self.ff(hid_slice, scale=lora_scale)
+                    for hid_slice in norm_hidden_states.chunk(num_chunks, dim=self._chunk_dim)
+                ],
                 dim=self._chunk_dim,
             )
         else:
-            ff_output = self.ff(norm_hidden_states)
+            ff_output = self.ff(norm_hidden_states, scale=lora_scale)
 
         if self.use_ada_layer_norm_zero:
             ff_output = gate_mlp.unsqueeze(1) * ff_output
@@ -295,9 +301,12 @@ def __init__(
         if final_dropout:
             self.net.append(nn.Dropout(dropout))
 
-    def forward(self, hidden_states):
+    def forward(self, hidden_states, scale: float = 1.0):
         for module in self.net:
-            hidden_states = module(hidden_states)
+            if isinstance(module, (LoRACompatibleLinear, GEGLU)):
+                hidden_states = module(hidden_states, scale)
+            else:
+                hidden_states = module(hidden_states)
         return hidden_states
 
 
@@ -342,8 +351,8 @@ def gelu(self, gate):
         # mps: gelu is not implemented for float16
         return F.gelu(gate.to(dtype=torch.float32)).to(dtype=gate.dtype)
 
-    def forward(self, hidden_states):
-        hidden_states, gate = self.proj(hidden_states).chunk(2, dim=-1)
+    def forward(self, hidden_states, scale: float = 1.0):
+        hidden_states, gate = self.proj(hidden_states, scale).chunk(2, dim=-1)
         return hidden_states * self.gelu(gate)
 
 

diff --git a/src/diffusers/models/attention_processor.py b/src/diffusers/models/attention_processor.py
@@ -570,15 +570,15 @@ def __call__(
         if attn.group_norm is not None:
             hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
 
-        query = attn.to_q(hidden_states, lora_scale=scale)
+        query = attn.to_q(hidden_states, scale=scale)
 
         if encoder_hidden_states is None:
             encoder_hidden_states = hidden_states
         elif attn.norm_cross:
             encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
 
-        key = attn.to_k(encoder_hidden_states, lora_scale=scale)
-        value = attn.to_v(encoder_hidden_states, lora_scale=scale)
+        key = attn.to_k(encoder_hidden_states, scale=scale)
+        value = attn.to_v(encoder_hidden_states, scale=scale)
 
         query = attn.head_to_batch_dim(query)
         key = attn.head_to_batch_dim(key)
@@ -589,7 +589,7 @@ def __call__(
         hidden_states = attn.batch_to_head_dim(hidden_states)
 
         # linear proj
-        hidden_states = attn.to_out[0](hidden_states, lora_scale=scale)
+        hidden_states = attn.to_out[0](hidden_states, scale=scale)
         # dropout
         hidden_states = attn.to_out[1](hidden_states)
 
@@ -722,17 +722,17 @@ def __call__(self, attn: Attention, hidden_states, encoder_hidden_states=None, a
 
         hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
 
-        query = attn.to_q(hidden_states, lora_scale=scale)
+        query = attn.to_q(hidden_states, scale=scale)
         query = attn.head_to_batch_dim(query)
 
-        encoder_hidden_states_key_proj = attn.add_k_proj(encoder_hidden_states, lora_scale=scale)
-        encoder_hidden_states_value_proj = attn.add_v_proj(encoder_hidden_states, lora_scale=scale)
+        encoder_hidden_states_key_proj = attn.add_k_proj(encoder_hidden_states, scale=scale)
+        encoder_hidden_states_value_proj = attn.add_v_proj(encoder_hidden_states, scale=scale)
         encoder_hidden_states_key_proj = attn.head_to_batch_dim(encoder_hidden_states_key_proj)
         encoder_hidden_states_value_proj = attn.head_to_batch_dim(encoder_hidden_states_value_proj)
 
         if not attn.only_cross_attention:
-            key = attn.to_k(hidden_states, lora_scale=scale)
-            value = attn.to_v(hidden_states, lora_scale=scale)
+            key = attn.to_k(hidden_states, scale=scale)
+            value = attn.to_v(hidden_states, scale=scale)
             key = attn.head_to_batch_dim(key)
             value = attn.head_to_batch_dim(value)
             key = torch.cat([encoder_hidden_states_key_proj, key], dim=1)
@@ -746,7 +746,7 @@ def __call__(self, attn: Attention, hidden_states, encoder_hidden_states=None, a
         hidden_states = attn.batch_to_head_dim(hidden_states)
 
         # linear proj
-        hidden_states = attn.to_out[0](hidden_states, lora_scale=scale)
+        hidden_states = attn.to_out[0](hidden_states, scale=scale)
         # dropout
         hidden_states = attn.to_out[1](hidden_states)
 
@@ -782,7 +782,7 @@ def __call__(self, attn: Attention, hidden_states, encoder_hidden_states=None, a
 
         hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
 
-        query = attn.to_q(hidden_states, lora_scale=scale)
+        query = attn.to_q(hidden_states, scale=scale)
         query = attn.head_to_batch_dim(query, out_dim=4)
 
         encoder_hidden_states_key_proj = attn.add_k_proj(encoder_hidden_states)
@@ -791,8 +791,8 @@ def __call__(self, attn: Attention, hidden_states, encoder_hidden_states=None, a
         encoder_hidden_states_value_proj = attn.head_to_batch_dim(encoder_hidden_states_value_proj, out_dim=4)
 
         if not attn.only_cross_attention:
-            key = attn.to_k(hidden_states, lora_scale=scale)
-            value = attn.to_v(hidden_states, lora_scale=scale)
+            key = attn.to_k(hidden_states, scale=scale)
+            value = attn.to_v(hidden_states, scale=scale)
             key = attn.head_to_batch_dim(key, out_dim=4)
             value = attn.head_to_batch_dim(value, out_dim=4)
             key = torch.cat([encoder_hidden_states_key_proj, key], dim=2)
@@ -809,7 +809,7 @@ def __call__(self, attn: Attention, hidden_states, encoder_hidden_states=None, a
         hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, residual.shape[1])
 
         # linear proj
-        hidden_states = attn.to_out[0](hidden_states, lora_scale=scale)
+        hidden_states = attn.to_out[0](hidden_states, scale=scale)
         # dropout
         hidden_states = attn.to_out[1](hidden_states)
 
@@ -937,15 +937,15 @@ def __call__(
         if attn.group_norm is not None:
             hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
 
-        query = attn.to_q(hidden_states, lora_scale=scale)
+        query = attn.to_q(hidden_states, scale=scale)
 
         if encoder_hidden_states is None:
             encoder_hidden_states = hidden_states
         elif attn.norm_cross:
             encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
 
-        key = attn.to_k(encoder_hidden_states, lora_scale=scale)
-        value = attn.to_v(encoder_hidden_states, lora_scale=scale)
+        key = attn.to_k(encoder_hidden_states, scale=scale)
+        value = attn.to_v(encoder_hidden_states, scale=scale)
 
         query = attn.head_to_batch_dim(query).contiguous()
         key = attn.head_to_batch_dim(key).contiguous()
@@ -958,7 +958,7 @@ def __call__(
         hidden_states = attn.batch_to_head_dim(hidden_states)
 
         # linear proj
-        hidden_states = attn.to_out[0](hidden_states, lora_scale=scale)
+        hidden_states = attn.to_out[0](hidden_states, scale=scale)
         # dropout
         hidden_states = attn.to_out[1](hidden_states)
 
@@ -1015,15 +1015,15 @@ def __call__(
         if attn.group_norm is not None:
             hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
 
-        query = attn.to_q(hidden_states, lora_scale=scale)
+        query = attn.to_q(hidden_states, scale=scale)
 
         if encoder_hidden_states is None:
             encoder_hidden_states = hidden_states
         elif attn.norm_cross:
             encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
 
-        key = attn.to_k(encoder_hidden_states, lora_scale=scale)
-        value = attn.to_v(encoder_hidden_states, lora_scale=scale)
+        key = attn.to_k(encoder_hidden_states, scale=scale)
+        value = attn.to_v(encoder_hidden_states, scale=scale)
 
         inner_dim = key.shape[-1]
         head_dim = inner_dim // attn.heads
@@ -1043,7 +1043,7 @@ def __call__(
         hidden_states = hidden_states.to(query.dtype)
 
         # linear proj
-        hidden_states = attn.to_out[0](hidden_states, lora_scale=scale)
+        hidden_states = attn.to_out[0](hidden_states, scale=scale)
         # dropout
         hidden_states = attn.to_out[1](hidden_states)