|
33 | 33 |
|
34 | 34 | # output in the same directory as the model
|
35 | 35 | dir_model = sys.argv[1]
|
36 |
| -fname_out = sys.argv[1] + "/ggml-model.bin" |
37 | 36 |
|
38 | 37 | fname_hparams = sys.argv[1] + "/params.json"
|
39 |
| -fname_model = sys.argv[1] + "/consolidated.00.pth" |
40 | 38 | fname_tokenizer = sys.argv[1] + "/../tokenizer.model"
|
41 | 39 |
|
| 40 | +def get_n_parts(dim): |
| 41 | + if dim == 4096: |
| 42 | + return 1 |
| 43 | + elif dim == 5120: |
| 44 | + return 2 |
| 45 | + elif dim == 6656: |
| 46 | + return 4 |
| 47 | + elif dim == 8192: |
| 48 | + return 8 |
| 49 | + else: |
| 50 | + print("Invalid dim: " + str(dim)) |
| 51 | + sys.exit(1) |
| 52 | + |
42 | 53 | # possible data types
|
43 | 54 | # ftype == 0 -> float32
|
44 | 55 | # ftype == 1 -> float16
|
|
61 | 72 |
|
62 | 73 | hparams.update({"vocab_size": tokenizer.vocab_size()})
|
63 | 74 |
|
| 75 | +n_parts = get_n_parts(hparams["dim"]) |
| 76 | + |
64 | 77 | print(hparams)
|
| 78 | +print('n_parts = ', n_parts) |
65 | 79 |
|
66 |
| -model = torch.load(fname_model, map_location="cpu") |
67 |
| - |
68 |
| -fout = open(fname_out, "wb") |
69 |
| - |
70 |
| -fout.write(struct.pack("i", 0x67676d6c)) # magic: ggml in hex |
71 |
| -fout.write(struct.pack("i", hparams["vocab_size"])) |
72 |
| -fout.write(struct.pack("i", hparams["dim"])) |
73 |
| -fout.write(struct.pack("i", hparams["multiple_of"])) |
74 |
| -fout.write(struct.pack("i", hparams["n_heads"])) |
75 |
| -fout.write(struct.pack("i", hparams["n_layers"])) |
76 |
| -fout.write(struct.pack("i", hparams["dim"] // hparams["n_heads"])) # rot (obsolete) |
77 |
| -fout.write(struct.pack("i", ftype)) |
78 |
| - |
79 |
| -# Is this correct?? |
80 |
| -for i in range(32000): |
81 |
| - # TODO: this is probably wrong - not sure how this tokenizer works |
82 |
| - text = tokenizer.decode([29889, i]).encode('utf-8') |
83 |
| - # remove the first byte (it's always '.') |
84 |
| - text = text[1:] |
85 |
| - fout.write(struct.pack("i", len(text))) |
86 |
| - fout.write(text) |
87 |
| - |
88 |
| -for k, v in model.items(): |
89 |
| - name = k |
90 |
| - shape = v.shape |
91 |
| - |
92 |
| - # skip layers.X.attention.inner_attention.rope.freqs |
93 |
| - if name[-5:] == "freqs": |
94 |
| - continue |
95 |
| - |
96 |
| - print("Processing variable: " + name + " with shape: ", shape, " and type: ", v.dtype) |
97 |
| - |
98 |
| - #data = tf.train.load_variable(dir_model, name).squeeze() |
99 |
| - data = v.numpy().squeeze() |
100 |
| - n_dims = len(data.shape); |
101 |
| - |
102 |
| - # for efficiency - transpose some matrices |
103 |
| - # "model/h.*/attn/c_attn/w" |
104 |
| - # "model/h.*/attn/c_proj/w" |
105 |
| - # "model/h.*/mlp/c_fc/w" |
106 |
| - # "model/h.*/mlp/c_proj/w" |
107 |
| - #if name[-14:] == "/attn/c_attn/w" or \ |
108 |
| - # name[-14:] == "/attn/c_proj/w" or \ |
109 |
| - # name[-11:] == "/mlp/c_fc/w" or \ |
110 |
| - # name[-13:] == "/mlp/c_proj/w": |
111 |
| - # print(" Transposing") |
112 |
| - # data = data.transpose() |
113 |
| - |
114 |
| - dshape = data.shape |
115 |
| - |
116 |
| - # default type is fp16 |
117 |
| - ftype_cur = 1 |
118 |
| - if ftype == 0 or n_dims == 1: |
119 |
| - print(" Converting to float32") |
120 |
| - data = data.astype(np.float32) |
121 |
| - ftype_cur = 0 |
122 |
| - |
123 |
| - # header |
124 |
| - str = name.encode('utf-8') |
125 |
| - fout.write(struct.pack("iii", n_dims, len(str), ftype_cur)) |
126 |
| - for i in range(n_dims): |
127 |
| - fout.write(struct.pack("i", dshape[n_dims - 1 - i])) |
128 |
| - fout.write(str); |
129 |
| - |
130 |
| - # data |
131 |
| - data.tofile(fout) |
132 |
| - |
133 |
| -fout.close() |
134 |
| - |
135 |
| -print("Done. Output file: " + fname_out) |
136 |
| -print("") |
| 80 | +for p in range(n_parts): |
| 81 | + print('Processing part ', p) |
| 82 | + |
| 83 | + #fname_model = sys.argv[1] + "/consolidated.00.pth" |
| 84 | + fname_model = sys.argv[1] + "/consolidated.0" + str(p) + ".pth" |
| 85 | + fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin" |
| 86 | + if (p > 0): |
| 87 | + fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin" + "." + str(p) |
| 88 | + |
| 89 | + model = torch.load(fname_model, map_location="cpu") |
| 90 | + |
| 91 | + fout = open(fname_out, "wb") |
| 92 | + |
| 93 | + fout.write(struct.pack("i", 0x67676d6c)) # magic: ggml in hex |
| 94 | + fout.write(struct.pack("i", hparams["vocab_size"])) |
| 95 | + fout.write(struct.pack("i", hparams["dim"])) |
| 96 | + fout.write(struct.pack("i", hparams["multiple_of"])) |
| 97 | + fout.write(struct.pack("i", hparams["n_heads"])) |
| 98 | + fout.write(struct.pack("i", hparams["n_layers"])) |
| 99 | + fout.write(struct.pack("i", hparams["dim"] // hparams["n_heads"])) # rot (obsolete) |
| 100 | + fout.write(struct.pack("i", ftype)) |
| 101 | + |
| 102 | + # Is this correct?? |
| 103 | + for i in range(32000): |
| 104 | + # TODO: this is probably wrong - not sure how this tokenizer works |
| 105 | + text = tokenizer.decode([29889, i]).encode('utf-8') |
| 106 | + # remove the first byte (it's always '.') |
| 107 | + text = text[1:] |
| 108 | + fout.write(struct.pack("i", len(text))) |
| 109 | + fout.write(text) |
| 110 | + |
| 111 | + for k, v in model.items(): |
| 112 | + name = k |
| 113 | + shape = v.shape |
| 114 | + |
| 115 | + # skip layers.X.attention.inner_attention.rope.freqs |
| 116 | + if name[-5:] == "freqs": |
| 117 | + continue |
| 118 | + |
| 119 | + print("Processing variable: " + name + " with shape: ", shape, " and type: ", v.dtype) |
| 120 | + |
| 121 | + #data = tf.train.load_variable(dir_model, name).squeeze() |
| 122 | + data = v.numpy().squeeze() |
| 123 | + n_dims = len(data.shape); |
| 124 | + |
| 125 | + # for efficiency - transpose some matrices |
| 126 | + # "model/h.*/attn/c_attn/w" |
| 127 | + # "model/h.*/attn/c_proj/w" |
| 128 | + # "model/h.*/mlp/c_fc/w" |
| 129 | + # "model/h.*/mlp/c_proj/w" |
| 130 | + #if name[-14:] == "/attn/c_attn/w" or \ |
| 131 | + # name[-14:] == "/attn/c_proj/w" or \ |
| 132 | + # name[-11:] == "/mlp/c_fc/w" or \ |
| 133 | + # name[-13:] == "/mlp/c_proj/w": |
| 134 | + # print(" Transposing") |
| 135 | + # data = data.transpose() |
| 136 | + |
| 137 | + dshape = data.shape |
| 138 | + |
| 139 | + # default type is fp16 |
| 140 | + ftype_cur = 1 |
| 141 | + if ftype == 0 or n_dims == 1: |
| 142 | + print(" Converting to float32") |
| 143 | + data = data.astype(np.float32) |
| 144 | + ftype_cur = 0 |
| 145 | + |
| 146 | + # header |
| 147 | + sname = name.encode('utf-8') |
| 148 | + fout.write(struct.pack("iii", n_dims, len(sname), ftype_cur)) |
| 149 | + for i in range(n_dims): |
| 150 | + fout.write(struct.pack("i", dshape[n_dims - 1 - i])) |
| 151 | + fout.write(sname); |
| 152 | + |
| 153 | + # data |
| 154 | + data.tofile(fout) |
| 155 | + |
| 156 | + # I hope this deallocates the memory .. |
| 157 | + model = None |
| 158 | + |
| 159 | + fout.close() |
| 160 | + |
| 161 | + print("Done. Output file: " + fname_out + ", (part ", p, ")") |
| 162 | + print("") |
0 commit comments