|
| 1 | +import json |
| 2 | +import os |
| 3 | +import re |
| 4 | +import struct |
| 5 | +import sys |
| 6 | +from typing import Any, Dict, Sequence, TextIO |
| 7 | + |
| 8 | +import torch |
| 9 | + |
| 10 | +from convert import DATA_TYPE_TO_FTYPE, NUMPY_TYPE_TO_DATA_TYPE, DataType |
| 11 | + |
| 12 | +HF_SUBLAYER_TO_GGML = { |
| 13 | + "self_attn.q_proj": "attention.wq", |
| 14 | + "self_attn.k_proj": "attention.wk", |
| 15 | + "self_attn.v_proj": "attention.wv", |
| 16 | + "self_attn.o_proj": "attention.wo", |
| 17 | + "mlp.gate_proj": "feed_forward.w1", |
| 18 | + "mlp.down_proj": "feed_forward.w2", |
| 19 | + "mlp.up_proj": "feed_forward.w3", |
| 20 | + "input_layernorm": "attention_norm", |
| 21 | + "post_attention_layernorm": "ffn_norm", |
| 22 | + # "norm": "norm", |
| 23 | + # "embed_tokens": "tok_embeddings", |
| 24 | + # "lm_head": "output", |
| 25 | +} |
| 26 | + |
| 27 | + |
| 28 | +def translate_tensor_name(t: str) -> str: |
| 29 | + match = re.match(r".*layers\.(\d+)\.(\w+\.\w+)\.lora_(A|B)\.weight", t) |
| 30 | + if match: |
| 31 | + nn = match.group(1) |
| 32 | + sub_layer = match.group(2) |
| 33 | + lora_type = match.group(3) |
| 34 | + |
| 35 | + sub_layer_renamed = HF_SUBLAYER_TO_GGML.get(sub_layer) |
| 36 | + if sub_layer_renamed is None: |
| 37 | + print(f"Error: unrecognized sub-layer {sub_layer} in tensor {t}") |
| 38 | + sys.exit(1) |
| 39 | + |
| 40 | + output_string = ( |
| 41 | + f"layers.{nn}.{HF_SUBLAYER_TO_GGML[sub_layer]}.weight.lora{lora_type}" |
| 42 | + ) |
| 43 | + return output_string |
| 44 | + else: |
| 45 | + print(f"Error: unrecognized tensor {t}") |
| 46 | + sys.exit(1) |
| 47 | + |
| 48 | + |
| 49 | +def write_file_header(fout: TextIO, params: Dict[str, Any]) -> None: |
| 50 | + fout.write(b"ggla"[::-1]) # magic (ggml lora) |
| 51 | + fout.write(struct.pack("i", 1)) # file version |
| 52 | + fout.write(struct.pack("ii", params["r"], params["lora_alpha"])) |
| 53 | + |
| 54 | + |
| 55 | +def write_tensor_header( |
| 56 | + self, name: str, shape: Sequence[int], data_type: DataType |
| 57 | +) -> None: |
| 58 | + sname = name.encode("utf-8") |
| 59 | + fout.write( |
| 60 | + struct.pack( |
| 61 | + "iii", |
| 62 | + len(shape), |
| 63 | + len(sname), |
| 64 | + DATA_TYPE_TO_FTYPE[NUMPY_TYPE_TO_DATA_TYPE[data_type]], |
| 65 | + ) |
| 66 | + ) |
| 67 | + fout.write(struct.pack("i" * len(shape), *shape[::-1])) |
| 68 | + fout.write(sname) |
| 69 | + fout.seek((fout.tell() + 31) & -32) |
| 70 | + |
| 71 | + |
| 72 | +if len(sys.argv) != 2: |
| 73 | + print(f"Usage: python {sys.argv[0]} <path>") |
| 74 | + print( |
| 75 | + "Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'" |
| 76 | + ) |
| 77 | + sys.exit(1) |
| 78 | + |
| 79 | +input_json = os.path.join(sys.argv[1], "adapter_config.json") |
| 80 | +input_model = os.path.join(sys.argv[1], "adapter_model.bin") |
| 81 | +output_path = os.path.join(sys.argv[1], "ggml-adapter-model.bin") |
| 82 | + |
| 83 | +model = torch.load(input_model, map_location="cpu") |
| 84 | + |
| 85 | +with open(input_json, "r") as f: |
| 86 | + params = json.load(f) |
| 87 | + |
| 88 | +if params["peft_type"] != "LORA": |
| 89 | + print(f"Error: unsupported adapter type {params['peft_type']}, expected LORA") |
| 90 | + sys.exit(1) |
| 91 | + |
| 92 | +if params["fan_in_fan_out"] == True: |
| 93 | + print("Error: param fan_in_fan_out is not supported") |
| 94 | + sys.exit(1) |
| 95 | + |
| 96 | +if params["bias"] is not None and params["bias"] != "none": |
| 97 | + print("Error: param bias is not supported") |
| 98 | + sys.exit(1) |
| 99 | + |
| 100 | +# TODO: these seem to be layers that have been trained but without lora. |
| 101 | +# doesn't seem widely used but eventually should be supported |
| 102 | +if params["modules_to_save"] is not None and len(params["modules_to_save"]) > 0: |
| 103 | + print("Error: param modules_to_save is not supported") |
| 104 | + sys.exit(1) |
| 105 | + |
| 106 | +with open(output_path, "wb") as fout: |
| 107 | + fout.truncate() |
| 108 | + |
| 109 | + write_file_header(fout, params) |
| 110 | + for k, v in model.items(): |
| 111 | + if k.endswith("lora_A.weight"): |
| 112 | + if v.dtype != torch.float16 and v.dtype != torch.float32: |
| 113 | + v = v.float() |
| 114 | + v = v.T |
| 115 | + else: |
| 116 | + v = v.float() |
| 117 | + |
| 118 | + t = v.numpy() |
| 119 | + tname = translate_tensor_name(k) |
| 120 | + print(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB") |
| 121 | + write_tensor_header(fout, tname, t.shape, t.dtype) |
| 122 | + t.tofile(fout) |
| 123 | + |
| 124 | +print(f"Converted {input_json} and {input_model} to {output_path}") |
0 commit comments