diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py index 1fd03d9d16a1..e8c859689897 100644 --- a/comfy/model_patcher.py +++ b/comfy/model_patcher.py @@ -123,16 +123,26 @@ def move_weight_functions(m, device): return memory class LowVramPatch: - def __init__(self, key, patches): + def __init__(self, key, patches, convert_func=None, set_func=None): self.key = key self.patches = patches + self.convert_func = convert_func + self.set_func = set_func + def __call__(self, weight): + if self.convert_func is not None: + weight = self.convert_func(weight.to(dtype=torch.float32, copy=True), inplace=True) + intermediate_dtype = weight.dtype - if intermediate_dtype not in [torch.float32, torch.float16, torch.bfloat16]: #intermediate_dtype has to be one that is supported in math ops + if self.set_func is None and intermediate_dtype not in [torch.float32, torch.float16, torch.bfloat16]: #intermediate_dtype has to be one that is supported in math ops intermediate_dtype = torch.float32 return comfy.float.stochastic_rounding(comfy.lora.calculate_weight(self.patches[self.key], weight.to(intermediate_dtype), self.key, intermediate_dtype=intermediate_dtype), weight.dtype, seed=string_to_seed(self.key)) - return comfy.lora.calculate_weight(self.patches[self.key], weight, self.key, intermediate_dtype=intermediate_dtype) + out = comfy.lora.calculate_weight(self.patches[self.key], weight, self.key, intermediate_dtype=intermediate_dtype) + if self.set_func is not None: + return self.set_func(out, seed=string_to_seed(self.key), return_weight=True) + else: + return out def get_key_weight(model, key): set_func = None @@ -657,13 +667,15 @@ def load(self, device_to=None, lowvram_model_memory=0, force_patch_weights=False if force_patch_weights: self.patch_weight_to_device(weight_key) else: - m.weight_function = [LowVramPatch(weight_key, self.patches)] + _, set_func, convert_func = get_key_weight(self.model, weight_key) + m.weight_function = [LowVramPatch(weight_key, self.patches, convert_func, set_func)] patch_counter += 1 if bias_key in self.patches: if force_patch_weights: self.patch_weight_to_device(bias_key) else: - m.bias_function = [LowVramPatch(bias_key, self.patches)] + _, set_func, convert_func = get_key_weight(self.model, bias_key) + m.bias_function = [LowVramPatch(bias_key, self.patches, convert_func, set_func)] patch_counter += 1 cast_weight = True @@ -825,10 +837,12 @@ def partially_unload(self, device_to, memory_to_free=0): module_mem += move_weight_functions(m, device_to) if lowvram_possible: if weight_key in self.patches: - m.weight_function.append(LowVramPatch(weight_key, self.patches)) + _, set_func, convert_func = get_key_weight(self.model, weight_key) + m.weight_function.append(LowVramPatch(weight_key, self.patches, convert_func, set_func)) patch_counter += 1 if bias_key in self.patches: - m.bias_function.append(LowVramPatch(bias_key, self.patches)) + _, set_func, convert_func = get_key_weight(self.model, bias_key) + m.bias_function.append(LowVramPatch(bias_key, self.patches, convert_func, set_func)) patch_counter += 1 cast_weight = True diff --git a/comfy/ops.py b/comfy/ops.py index 9d7dedd374b6..2415c96bf486 100644 --- a/comfy/ops.py +++ b/comfy/ops.py @@ -416,8 +416,10 @@ def convert_weight(self, weight, inplace=False, **kwargs): else: return weight * self.scale_weight.to(device=weight.device, dtype=weight.dtype) - def set_weight(self, weight, inplace_update=False, seed=None, **kwargs): + def set_weight(self, weight, inplace_update=False, seed=None, return_weight=False, **kwargs): weight = comfy.float.stochastic_rounding(weight / self.scale_weight.to(device=weight.device, dtype=weight.dtype), self.weight.dtype, seed=seed) + if return_weight: + return weight if inplace_update: self.weight.data.copy_(weight) else: