don't cast the trainable lora layers to half precision (#1644)

pacman100 · web-flow · commit 99f2c94b2200 · 2024-05-15T16:25:46.000+02:00
* don't cast the trainable lora layers to half precision

* quality
diff --git a/trl/trainer/utils.py b/trl/trainer/utils.py
@@ -665,12 +665,8 @@ def neftune_post_forward_hook(module, input, output):
 
 
 def peft_module_casting_to_bf16(model):
-    from peft.tuners.tuners_utils import BaseTunerLayer
-
     for name, module in model.named_modules():
-        if isinstance(module, BaseTunerLayer):
-            module = module.to(torch.bfloat16)
-        elif isinstance(module, torch.nn.LayerNorm) or "norm" in name:
+        if isinstance(module, torch.nn.LayerNorm) or "norm" in name:
             module = module.to(torch.float32)
         elif any(x in name for x in ["lm_head", "embed_tokens", "wte", "wpe"]):
             if hasattr(module, "weight"):