5
5
import math
6
6
import struct
7
7
import sys
8
+ from enum import IntEnum
8
9
from pathlib import Path
9
10
10
11
import numpy as np
34
35
gguf .GGMLQuantizationType .Q8_K : (256 , 4 + QK_K + QK_K // 8 ),
35
36
}
36
37
38
+ class GGMLFormat (IntEnum ):
39
+ GGML = 0
40
+ GGMF = 1
41
+ GGJT = 2
42
+
43
+ class GGMLFType (IntEnum ):
44
+ ALL_F32 = 0
45
+ MOSTLY_F16 = 1
46
+ MOSTLY_Q4_0 = 2
47
+ MOSTLY_Q4_1 = 3
48
+ MOSTLY_Q4_1_SOME_F16 = 4
49
+ MOSTLY_Q8_0 = 7
50
+ MOSTLY_Q5_0 = 8
51
+ MOSTLY_Q5_1 = 9
52
+ MOSTLY_Q2_K = 10
53
+ MOSTLY_Q3_K_S = 11
54
+ MOSTLY_Q3_K_M = 12
55
+ MOSTLY_Q3_K_L = 13
56
+ MOSTLY_Q4_K_S = 14
57
+ MOSTLY_Q4_K_M = 15
58
+ MOSTLY_Q5_K_S = 16
59
+ MOSTLY_Q5_K_M = 17
60
+ MOSTLY_Q6_K = 18
61
+
37
62
class Hyperparameters :
38
63
def __init__ (self ):
39
- self .n_vocab = self .n_embd = self .n_mult = self .n_head = self .n_layer = self .n_rot = self .ftype = 0
40
- self .n_ff = 0
64
+ self .n_vocab = self .n_embd = self .n_mult = self .n_head = 0
65
+ self .n_layer = self .n_rot = self .n_ff = 0
66
+ self .ftype = GGMLFType .ALL_F32
41
67
42
68
def set_n_ff (self , model ):
43
69
ff_tensor_idx = model .tensor_map .get (b'layers.0.feed_forward.w1.weight' )
@@ -53,37 +79,46 @@ def load(self, data, offset):
53
79
self .n_head ,
54
80
self .n_layer ,
55
81
self .n_rot ,
56
- self . ftype ,
82
+ ftype ,
57
83
) = struct .unpack ('<7I' , data [offset :offset + (4 * 7 )])
84
+ try :
85
+ self .ftype = GGMLFType (ftype )
86
+ except ValueError :
87
+ raise ValueError (f'Invalid ftype { ftype } ' )
58
88
return 4 * 7
59
89
60
90
def __str__ (self ):
61
- return f'<Hyperparameters: n_vocab={ self .n_vocab } , n_embd={ self .n_embd } , n_mult={ self .n_mult } , n_head={ self .n_head } , n_layer={ self .n_layer } , n_rot={ self .n_rot } , n_ff={ self .n_ff } , ftype={ self .ftype } >'
91
+ return f'<Hyperparameters: n_vocab={ self .n_vocab } , n_embd={ self .n_embd } , n_mult={ self .n_mult } , n_head={ self .n_head } , n_layer={ self .n_layer } , n_rot={ self .n_rot } , n_ff={ self .n_ff } , ftype={ self .ftype . name } >'
62
92
63
93
class Vocab :
64
- def __init__ (self ):
94
+ def __init__ (self , load_scores = True ):
65
95
self .items = []
96
+ self .load_scores = load_scores
66
97
67
98
def load (self , data , offset , n_vocab ):
68
99
orig_offset = offset
69
100
for _ in range (n_vocab ):
70
101
itemlen = struct .unpack ('<I' , data [offset :offset + 4 ])[0 ]
71
102
assert itemlen < 4096 , 'Absurd vocab item length'
72
103
offset += 4
73
- vocab = bytes (data [offset :offset + itemlen ])
104
+ item_text = bytes (data [offset :offset + itemlen ])
74
105
offset += itemlen
75
- score = struct .unpack ('<f' , data [offset :offset + 4 ])[0 ]
76
- offset += 4
77
- self .items .append ((vocab , score ))
106
+ if self .load_scores :
107
+ item_score = struct .unpack ('<f' , data [offset :offset + 4 ])[0 ]
108
+ offset += 4
109
+ else :
110
+ item_score = 0.0
111
+ self .items .append ((item_text , item_score ))
78
112
return offset - orig_offset
79
113
80
114
class Tensor :
81
- def __init__ (self ):
115
+ def __init__ (self , use_padding = True ):
82
116
self .name = None
83
117
self .dims : tuple [int , ...] = ()
84
118
self .dtype = None
85
119
self .start_offset = 0
86
120
self .len_bytes = np .int64 (0 )
121
+ self .use_padding = use_padding
87
122
88
123
def load (self , data , offset ):
89
124
orig_offset = offset
@@ -99,7 +134,7 @@ def load(self, data, offset):
99
134
offset += 4 * n_dims
100
135
self .name = bytes (data [offset :offset + name_len ])
101
136
offset += name_len
102
- pad = ((offset + 31 ) & ~ 31 ) - offset
137
+ pad = ((offset + 31 ) & ~ 31 ) - offset if self . use_padding else 0
103
138
offset += pad
104
139
n_elems = np .prod (self .dims )
105
140
n_bytes = np .int64 (np .int64 (n_elems ) * np .int64 (tysize )) // np .int64 (blksize )
@@ -109,28 +144,60 @@ def load(self, data, offset):
109
144
# print(n_dims, name_len, dtype, self.dims, self.name, pad)
110
145
return offset - orig_offset
111
146
112
- class GGMLV3Model :
147
+ class GGMLModel :
113
148
def __init__ (self ):
114
149
self .hyperparameters = None
115
150
self .vocab = None
116
151
self .tensor_map = {}
117
152
self .tensors = []
118
153
119
154
def validate_header (self , data , offset ):
120
- if bytes (data [offset :offset + 4 ]) != b'tjgg' or struct .unpack ('<I' , data [offset + 4 :offset + 8 ])[0 ] != 3 :
121
- raise ValueError ('Only GGJTv3 supported' )
122
- return 8
155
+ magic = bytes (data [offset :offset + 4 ])
156
+ if magic == b'GGUF' :
157
+ raise ValueError ('File is already in GGUF format.' )
158
+ if magic == b'lmgg' :
159
+ self .file_format = GGMLFormat .GGML
160
+ self .format_version = 1
161
+ return 4
162
+ version = struct .unpack ('<I' , data [offset + 4 :offset + 8 ])[0 ]
163
+ if magic == b'fmgg' :
164
+ if version != 1 :
165
+ raise ValueError (f'Cannot handle unexpected GGMF file version { version } ' )
166
+ self .file_format = GGMLFormat .GGMF
167
+ self .format_version = version
168
+ return 8
169
+ if magic == b'tjgg' :
170
+ if version < 1 or version > 3 :
171
+ raise ValueError (f'Cannot handle unexpected GGJT file version { version } ' )
172
+ self .file_format = GGMLFormat .GGJT
173
+ self .format_version = version
174
+ return 8
175
+ raise ValueError (f"Unexpected file magic { magic !r} ! This doesn't look like a GGML format file." )
176
+
177
+ def validate_conversion (self , ftype ):
178
+ err = ''
179
+ if (self .file_format < GGMLFormat .GGJT or self .format_version < 2 ):
180
+ if ftype not in (GGMLFType .ALL_F32 , GGMLFType .MOSTLY_F16 ):
181
+ err = 'Quantizations changed in GGJTv2. Can only convert unquantized GGML files older than GGJTv2.'
182
+ elif (self .file_format == GGMLFormat .GGJT and self .format_version == 2 ):
183
+ if ftype in ( GGMLFType .MOSTLY_Q4_0 , GGMLFType .MOSTLY_Q4_1 ,
184
+ GGMLFType .MOSTLY_Q4_1_SOME_F16 , GGMLFType .MOSTLY_Q8_0 ):
185
+ err = 'Q4 and Q8 quantizations changed in GGJTv3.'
186
+ if len (err ) > 0 :
187
+ raise ValueError (f'{ err } Sorry, your { self .file_format .name } v{ self .format_version } file of type { ftype .name } is not eligible for conversion.' )
123
188
124
189
def load (self , data , offset ):
125
190
offset += self .validate_header (data , offset )
126
191
hp = Hyperparameters ()
127
192
offset += hp .load (data , offset )
128
- vocab = Vocab ()
193
+ print (f'* File format: { self .file_format .name } v{ self .format_version } with ftype { hp .ftype .name } ' )
194
+ self .validate_conversion (hp .ftype )
195
+ vocab = Vocab (load_scores = self .file_format > GGMLFormat .GGML )
129
196
offset += vocab .load (data , offset , hp .n_vocab )
130
197
tensors : list [Tensor ] = []
131
198
tensor_map = {}
132
199
while offset < len (data ):
133
- tensor = Tensor ()
200
+ tensor = Tensor (use_padding = self . file_format > GGMLFormat . GGMF )
134
201
offset += tensor .load (data , offset )
135
202
tensor_map [tensor .name ] = len (tensors )
136
203
tensors .append (tensor )
@@ -168,7 +235,10 @@ def __init__(self, ggml_model, data, cfg, params_override = None, vocab_override
168
235
169
236
def save (self ):
170
237
print ('* Preparing to save GGUF file' )
171
- gguf_writer = gguf .GGUFWriter (self .cfg .output , gguf .MODEL_ARCH_NAMES [gguf .MODEL_ARCH .LLAMA ], use_temp_file = False )
238
+ gguf_writer = gguf .GGUFWriter (
239
+ self .cfg .output ,
240
+ gguf .MODEL_ARCH_NAMES [gguf .MODEL_ARCH .LLAMA ],
241
+ use_temp_file = False )
172
242
self .add_params (gguf_writer )
173
243
self .add_vocab (gguf_writer )
174
244
if self .special_vocab is not None :
@@ -185,7 +255,10 @@ def save(self):
185
255
def add_params (self , gguf_writer ):
186
256
hp = self .model .hyperparameters
187
257
cfg = self .cfg
188
- desc = cfg .desc if cfg .desc is not None else 'converted from legacy GGJTv3 format'
258
+ if cfg .desc is not None :
259
+ desc = cfg .desc
260
+ else :
261
+ desc = f'converted from legacy { self .model .file_format .name } v{ self .model .format_version } { hp .ftype .name } format'
189
262
try :
190
263
# Filenames aren't necessarily valid UTF8.
191
264
name = cfg .name if cfg .name is not None else cfg .input .name
@@ -195,6 +268,7 @@ def add_params(self, gguf_writer):
195
268
if name is not None :
196
269
gguf_writer .add_name (name )
197
270
gguf_writer .add_description (desc )
271
+ gguf_writer .add_file_type (int (hp .ftype ))
198
272
if self .params_override is not None :
199
273
po = self .params_override
200
274
assert po .n_embd == hp .n_embd , 'Model hyperparams mismatch'
@@ -231,7 +305,8 @@ def add_vocab(self, gguf_writer):
231
305
tokens .append (vbytes )
232
306
scores .append (score )
233
307
toktypes .append (ttype )
234
- assert len (tokens ) == hp .n_vocab , f'Override vocab has a different number of items than hyperparameters - override = { len (tokens )} but n_vocab={ hp .n_vocab } '
308
+ assert len (tokens ) == hp .n_vocab , \
309
+ f'Override vocab has a different number of items than hyperparameters - override = { len (tokens )} but n_vocab={ hp .n_vocab } '
235
310
gguf_writer .add_token_list (tokens )
236
311
gguf_writer .add_token_scores (scores )
237
312
if len (toktypes ) > 0 :
@@ -283,7 +358,11 @@ def add_tensors(self, gguf_writer):
283
358
tempdims [1 ] = tempdims [0 ]
284
359
tempdims [0 ] = temp
285
360
# print(f'+ {tensor.name} | {mapped_name} {tensor.dims} :: {tempdims}')
286
- gguf_writer .add_tensor (mapped_name , data [tensor .start_offset :tensor .start_offset + tensor .len_bytes ], raw_shape = tempdims , raw_dtype = tensor .dtype )
361
+ gguf_writer .add_tensor (
362
+ mapped_name ,
363
+ data [tensor .start_offset :tensor .start_offset + tensor .len_bytes ],
364
+ raw_shape = tempdims ,
365
+ raw_dtype = tensor .dtype )
287
366
288
367
def handle_metadata (cfg , hp ):
289
368
import convert
@@ -305,32 +384,46 @@ def handle_metadata(cfg, hp):
305
384
params = convert .Params .loadOriginalParamsJson (fakemodel , orig_config_path )
306
385
else :
307
386
raise ValueError ('Unable to load metadata' )
308
- vocab = convert .load_vocab (cfg .vocab_dir if cfg .vocab_dir is not None else cfg .model_metadata_dir , cfg .vocabtype )
387
+ vocab = convert .load_vocab (
388
+ cfg .vocab_dir if cfg .vocab_dir is not None else cfg .model_metadata_dir ,
389
+ cfg .vocabtype )
309
390
# FIXME: Respect cfg.vocab_dir?
310
391
svocab = gguf .SpecialVocab (cfg .model_metadata_dir )
311
392
convert .check_vocab_size (params , vocab )
312
393
return (params , vocab , svocab )
313
394
314
395
def handle_args ():
315
- parser = argparse .ArgumentParser (description = 'Convert GGMLv3 models to GGUF' )
316
- parser .add_argument ('--input' , '-i' , type = Path , required = True , help = 'Input GGMLv3 filename' )
317
- parser .add_argument ('--output' , '-o' , type = Path , required = True , help = 'Output GGUF filename' )
318
- parser .add_argument ('--name' , help = 'Set model name' )
319
- parser .add_argument ('--desc' , help = 'Set model description' )
320
- parser .add_argument ('--gqa' , type = int , default = 1 , help = 'grouped-query attention factor (use 8 for LLaMA2 70B)' )
321
- parser .add_argument ('--eps' , default = '5.0e-06' , help = 'RMS norm eps: Use 1e-6 for LLaMA1 and OpenLLaMA, use 1e-5 for LLaMA2' )
322
- parser .add_argument ('--context-length' , '-c' , type = int , default = 2048 , help = 'Default max context length: LLaMA1 is typically 2048, LLaMA2 is typically 4096' )
323
- parser .add_argument ('--model-metadata-dir' , '-m' , type = Path , help = 'Load HuggingFace/.pth vocab and metadata from the specified directory' )
324
- parser .add_argument ("--vocab-dir" , type = Path , help = "directory containing tokenizer.model, if separate from model file - only meaningful with --model-metadata-dir" )
325
- parser .add_argument ("--vocabtype" , choices = ["spm" , "bpe" ], help = "vocab format - only meaningful with --model-metadata-dir and/or --vocab-dir (default: spm)" , default = "spm" )
396
+ parser = argparse .ArgumentParser (description = 'Convert GGML models to GGUF' )
397
+ parser .add_argument ('--input' , '-i' , type = Path , required = True ,
398
+ help = 'Input GGMLv3 filename' )
399
+ parser .add_argument ('--output' , '-o' , type = Path , required = True ,
400
+ help = 'Output GGUF filename' )
401
+ parser .add_argument ('--name' ,
402
+ help = 'Set model name' )
403
+ parser .add_argument ('--desc' ,
404
+ help = 'Set model description' )
405
+ parser .add_argument ('--gqa' , type = int , default = 1 ,
406
+ help = 'grouped-query attention factor (use 8 for LLaMA2 70B)' )
407
+ parser .add_argument ('--eps' , default = '5.0e-06' ,
408
+ help = 'RMS norm eps: Use 1e-6 for LLaMA1 and OpenLLaMA, use 1e-5 for LLaMA2' )
409
+ parser .add_argument ('--context-length' , '-c' , type = int , default = 2048 ,
410
+ help = 'Default max context length: LLaMA1 is typically 2048, LLaMA2 is typically 4096' )
411
+ parser .add_argument ('--model-metadata-dir' , '-m' , type = Path ,
412
+ help = 'Load HuggingFace/.pth vocab and metadata from the specified directory' )
413
+ parser .add_argument ("--vocab-dir" , type = Path ,
414
+ help = "directory containing tokenizer.model, if separate from model file - only meaningful with --model-metadata-dir" )
415
+ parser .add_argument ("--vocabtype" , choices = ["spm" , "bpe" ], default = "spm" ,
416
+ help = "vocab format - only meaningful with --model-metadata-dir and/or --vocab-dir (default: spm)" )
326
417
return parser .parse_args ()
327
418
328
419
def main ():
329
420
cfg = handle_args ()
330
421
print (f'* Using config: { cfg } ' )
331
422
print ('\n === WARNING === Be aware that this conversion script is best-effort. Use a native GGUF model if possible. === WARNING ===\n ' )
423
+ if cfg .model_metadata_dir is None and (cfg .gqa == 1 or cfg .eps == '5.0e-06' ):
424
+ print ('- Note: If converting LLaMA2, specifying "--eps 1e-5" is required. 70B models also need "--gqa 8".' )
332
425
data = np .memmap (cfg .input , mode = 'r' )
333
- model = GGMLV3Model ()
426
+ model = GGMLModel ()
334
427
print ('* Scanning GGML input file' )
335
428
offset = model .load (data , 0 )
336
429
print (f'* GGML model hyperparameters: { model .hyperparameters } ' )
@@ -345,7 +438,12 @@ def main():
345
438
print (f'* Special vocab: { special_vocab } ' )
346
439
else :
347
440
print ('\n === WARNING === Special tokens may not be converted correctly. Use --model-metadata-dir if possible === WARNING ===\n ' )
348
- converter = GGMLToGGUF (model , data , cfg , params_override = params_override , vocab_override = vocab_override , special_vocab = special_vocab )
441
+ if model .file_format == GGMLFormat .GGML :
442
+ print ('! This is a very old GGML file that does not contain vocab scores. Strongly recommend using model metadata!' )
443
+ converter = GGMLToGGUF (model , data , cfg ,
444
+ params_override = params_override ,
445
+ vocab_override = vocab_override ,
446
+ special_vocab = special_vocab )
349
447
converter .save ()
350
448
print (f'* Successful completion. Output saved to: { cfg .output } ' )
351
449
0 commit comments