1
1
#!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
2
4
3
from __future__ import annotations
5
4
@@ -47,12 +46,11 @@ class Model:
47
46
_model_classes : dict [str , type [Model ]] = {}
48
47
49
48
dir_model : Path
50
- ftype : gguf . LlamaFileType
49
+ ftype : int
51
50
is_big_endian : bool
52
51
endianess : gguf .GGUFEndian
53
52
use_temp_file : bool
54
53
lazy : bool
55
- model_name : str | None
56
54
part_names : list [str ]
57
55
is_safetensors : bool
58
56
hparams : dict [str , Any ]
@@ -65,7 +63,7 @@ class Model:
65
63
# subclasses should define this!
66
64
model_arch : gguf .MODEL_ARCH
67
65
68
- def __init__ (self , dir_model : Path , ftype : gguf .LlamaFileType , fname_out : Path , is_big_endian : bool , use_temp_file : bool , eager : bool , model_name : str | None ):
66
+ def __init__ (self , dir_model : Path , ftype : gguf .LlamaFileType , fname_out : Path , is_big_endian : bool , use_temp_file : bool , eager : bool ):
69
67
if type (self ) is Model :
70
68
raise TypeError (f"{ type (self ).__name__ !r} should not be directly instantiated" )
71
69
self .dir_model = dir_model
@@ -74,11 +72,10 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
74
72
self .endianess = gguf .GGUFEndian .BIG if is_big_endian else gguf .GGUFEndian .LITTLE
75
73
self .use_temp_file = use_temp_file
76
74
self .lazy = not eager
77
- self .model_name = model_name
78
- self .part_names = Model .get_model_part_names (self .dir_model , "model" , ".safetensors" )
75
+ self .part_names = Model .get_model_part_names (self .dir_model , ".safetensors" )
79
76
self .is_safetensors = len (self .part_names ) > 0
80
77
if not self .is_safetensors :
81
- self .part_names = Model .get_model_part_names (self .dir_model , "pytorch_model" , " .bin" )
78
+ self .part_names = Model .get_model_part_names (self .dir_model , ".bin" )
82
79
self .hparams = Model .load_hparams (self .dir_model )
83
80
self .block_count = self .find_hparam (["n_layers" , "num_hidden_layers" , "n_layer" ])
84
81
self .tensor_map = gguf .get_tensor_name_map (self .model_arch , self .block_count )
@@ -96,7 +93,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
96
93
ftype_lw : str = ftype_up .lower ()
97
94
# allow templating the file name with the output ftype, useful with the "auto" ftype
98
95
self .fname_out = fname_out .parent / fname_out .name .format (ftype_lw , outtype = ftype_lw , ftype = ftype_lw , OUTTYPE = ftype_up , FTYPE = ftype_up )
99
- self .gguf_writer = gguf .GGUFWriter (path = None , arch = gguf .MODEL_ARCH_NAMES [self .model_arch ], endianess = self .endianess , use_temp_file = self .use_temp_file )
96
+ self .gguf_writer = gguf .GGUFWriter (self . fname_out , gguf .MODEL_ARCH_NAMES [self .model_arch ], endianess = self .endianess , use_temp_file = self .use_temp_file )
100
97
101
98
@classmethod
102
99
def __init_subclass__ (cls ):
@@ -140,7 +137,7 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
140
137
from safetensors import safe_open
141
138
ctx = cast (ContextManager [Any ], safe_open (self .dir_model / part_name , framework = "pt" , device = "cpu" ))
142
139
else :
143
- ctx = contextlib .nullcontext (torch .load (str (self .dir_model / part_name ), map_location = "cpu" , mmap = True , weights_only = True ))
140
+ ctx = contextlib .nullcontext (torch .load (str (self .dir_model / part_name ), map_location = "cpu" , mmap = False , weights_only = True ))
144
141
145
142
with ctx as model_part :
146
143
tensor_names_from_parts .update (model_part .keys ())
@@ -177,14 +174,14 @@ def match_model_tensor_name(self, name: str, key: gguf.MODEL_TENSOR, bid: int |
177
174
return False
178
175
return name == (key_name + suffix )
179
176
180
- def map_tensor_name (self , name : str , try_suffixes : Sequence [str ] = (".weight" , ".bias" )) -> str :
177
+ def map_tensor_name (self , name : str , try_suffixes : Sequence [str ] = (".weight" , ".bias" , ".beta" , ".gamma" )) -> str :
181
178
new_name = self .tensor_map .get_name (key = name , try_suffixes = try_suffixes )
182
179
if new_name is None :
183
180
raise ValueError (f"Can not map tensor { name !r} " )
184
181
return new_name
185
182
186
183
def set_gguf_parameters (self ):
187
- self .gguf_writer .add_name (self .dir_model .name if self . model_name is None else self . model_name )
184
+ self .gguf_writer .add_name (self .dir_model .name )
188
185
self .gguf_writer .add_block_count (self .block_count )
189
186
190
187
if (n_ctx := self .find_hparam (["max_position_embeddings" , "n_ctx" ], optional = True )) is not None :
@@ -248,6 +245,9 @@ def write_tensors(self):
248
245
# we don't need these
249
246
if name .endswith ((".attention.masked_bias" , ".attention.bias" , ".rotary_emb.inv_freq" )):
250
247
continue
248
+
249
+ if name .startswith ("bert." ):
250
+ name = name .removeprefix ("bert." )
251
251
252
252
old_dtype = data_torch .dtype
253
253
@@ -261,7 +261,7 @@ def write_tensors(self):
261
261
if part .isdecimal ():
262
262
bid = int (part )
263
263
break
264
-
264
+
265
265
for new_name , data in ((n , d .squeeze ().numpy ()) for n , d in self .modify_tensors (data_torch , name , bid )):
266
266
data : np .ndarray = data # type hint
267
267
n_dims = len (data .shape )
@@ -326,21 +326,21 @@ def write_tensors(self):
326
326
327
327
def write (self ):
328
328
self .write_tensors ()
329
- self .gguf_writer .write_header_to_file (self . fname_out )
329
+ self .gguf_writer .write_header_to_file ()
330
330
self .gguf_writer .write_kv_data_to_file ()
331
331
self .gguf_writer .write_tensors_to_file (progress = True )
332
332
self .gguf_writer .close ()
333
333
334
334
def write_vocab (self ):
335
- self .gguf_writer .write_header_to_file (self . fname_out )
335
+ self .gguf_writer .write_header_to_file ()
336
336
self .gguf_writer .write_kv_data_to_file ()
337
337
self .gguf_writer .close ()
338
338
339
339
@staticmethod
340
- def get_model_part_names (dir_model : Path , prefix : str , suffix : str ) -> list [str ]:
340
+ def get_model_part_names (dir_model : Path , suffix : str ) -> list [str ]:
341
341
part_names : list [str ] = []
342
342
for filename in os .listdir (dir_model ):
343
- if filename .startswith ( prefix ) and filename . endswith (suffix ):
343
+ if filename .endswith (suffix ):
344
344
part_names .append (filename )
345
345
346
346
part_names .sort ()
@@ -423,9 +423,6 @@ def get_vocab_base_pre(self, tokenizer) -> str:
423
423
# NOTE: if you get an error here, you need to update the convert-hf-to-gguf-update.py script
424
424
# or pull the latest version of the model from Huggingface
425
425
# don't edit the hashes manually!
426
- if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5" :
427
- # ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B
428
- res = "llama-bpe"
429
426
if chkhsh == "049ecf7629871e3041641907f3de7c733e4dbfdc736f57d882ba0b0845599754" :
430
427
# ref: https://huggingface.co/deepseek-ai/deepseek-llm-7b-base
431
428
res = "deepseek-llm"
@@ -435,6 +432,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
435
432
if chkhsh == "8aeee3860c56296a157a1fe2fad249ec40aa59b1bb5709f4ade11c4e6fe652ed" :
436
433
# ref: https://huggingface.co/tiiuae/falcon-7b
437
434
res = "falcon"
435
+ if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f" :
436
+ # ref: https://huggingface.co/google-bert/bert-base-uncased
437
+ res = "bert"
438
438
if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f" :
439
439
# ref: https://huggingface.co/BAAI/bge-small-en-v1.5
440
440
res = "bert-bge"
@@ -453,18 +453,12 @@ def get_vocab_base_pre(self, tokenizer) -> str:
453
453
if chkhsh == "6221ad2852e85ce96f791f476e0b390cf9b474c9e3d1362f53a24a06dc8220ff" :
454
454
# ref: https://huggingface.co/smallcloudai/Refact-1_6-base
455
455
res = "refact"
456
- if chkhsh == "9c2227e4dd922002fb81bde4fc02b0483ca4f12911410dee2255e4987644e3f8" :
457
- # ref: https://huggingface.co/CohereForAI/c4ai-command-r-v01
458
- res = "command-r"
459
456
if chkhsh == "e636dc30a262dcc0d8c323492e32ae2b70728f4df7dfe9737d9f920a282b8aea" :
460
457
# ref: https://huggingface.co/Qwen/Qwen1.5-7B
461
458
res = "qwen2"
462
459
if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166" :
463
460
# ref: https://huggingface.co/allenai/OLMo-1.7-7B-hf
464
461
res = "olmo"
465
- if chkhsh == "a8594e3edff7c29c003940395316294b2c623e09894deebbc65f33f1515df79e" :
466
- # ref: https://huggingface.co/databricks/dbrx-base
467
- res = "dbrx"
468
462
if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f" :
469
463
# ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-en
470
464
res = "jina-v2-en"
@@ -477,9 +471,6 @@ def get_vocab_base_pre(self, tokenizer) -> str:
477
471
if chkhsh == "c136ed14d01c2745d4f60a9596ae66800e2b61fa45643e72436041855ad4089d" :
478
472
# ref: https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct
479
473
res = "smaug-bpe"
480
- if chkhsh == "7967bfa498ade6b757b064f31e964dddbb80f8f9a4d68d4ba7998fcf281c531a" :
481
- # ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-code
482
- res = "jina-v2-code"
483
474
484
475
if res is None :
485
476
logger .warning ("\n " )
@@ -667,7 +658,7 @@ class GPTNeoXModel(Model):
667
658
def set_gguf_parameters (self ):
668
659
block_count = self .hparams ["num_hidden_layers" ]
669
660
670
- self .gguf_writer .add_name (self .dir_model .name if self . model_name is None else self . model_name )
661
+ self .gguf_writer .add_name (self .dir_model .name )
671
662
self .gguf_writer .add_context_length (self .hparams ["max_position_embeddings" ])
672
663
self .gguf_writer .add_embedding_length (self .hparams ["hidden_size" ])
673
664
self .gguf_writer .add_block_count (block_count )
@@ -800,7 +791,7 @@ def set_vocab(self):
800
791
801
792
def set_gguf_parameters (self ):
802
793
block_count = self .hparams ["n_layers" ]
803
- self .gguf_writer .add_name (self .dir_model .name if self . model_name is None else self . model_name )
794
+ self .gguf_writer .add_name (self .dir_model .name )
804
795
self .gguf_writer .add_context_length (self .hparams ["max_seq_len" ])
805
796
self .gguf_writer .add_embedding_length (self .hparams ["d_model" ])
806
797
self .gguf_writer .add_block_count (block_count )
@@ -852,7 +843,7 @@ def set_gguf_parameters(self):
852
843
raise ValueError ("gguf: can not find ctx length parameter." )
853
844
854
845
self .gguf_writer .add_file_type (self .ftype )
855
- self .gguf_writer .add_name (self .dir_model .name if self . model_name is None else self . model_name )
846
+ self .gguf_writer .add_name (self .dir_model .name )
856
847
self .gguf_writer .add_source_hf_repo (hf_repo )
857
848
self .gguf_writer .add_tensor_data_layout ("Meta AI original pth" )
858
849
self .gguf_writer .add_context_length (ctx_length )
@@ -889,7 +880,7 @@ def set_gguf_parameters(self):
889
880
else :
890
881
raise ValueError ("gguf: can not find ctx length parameter." )
891
882
892
- self .gguf_writer .add_name (self .dir_model .name if self . model_name is None else self . model_name )
883
+ self .gguf_writer .add_name (self .dir_model .name )
893
884
self .gguf_writer .add_source_hf_repo (hf_repo )
894
885
self .gguf_writer .add_tensor_data_layout ("Meta AI original pth" )
895
886
self .gguf_writer .add_context_length (ctx_length )
@@ -1012,7 +1003,7 @@ def set_gguf_parameters(self):
1012
1003
else :
1013
1004
raise ValueError ("gguf: can not find ctx length parameter." )
1014
1005
1015
- self .gguf_writer .add_name (self .dir_model .name if self . model_name is None else self . model_name )
1006
+ self .gguf_writer .add_name (self .dir_model .name )
1016
1007
self .gguf_writer .add_source_hf_repo (hf_repo )
1017
1008
self .gguf_writer .add_tensor_data_layout ("Meta AI original pth" )
1018
1009
self .gguf_writer .add_context_length (ctx_length )
@@ -1208,7 +1199,7 @@ def set_gguf_parameters(self):
1208
1199
hparams = self .hparams
1209
1200
block_count = hparams ["num_hidden_layers" ]
1210
1201
1211
- self .gguf_writer .add_name (self .dir_model .name if self . model_name is None else self . model_name )
1202
+ self .gguf_writer .add_name (self .dir_model .name )
1212
1203
self .gguf_writer .add_context_length (hparams ["max_position_embeddings" ])
1213
1204
self .gguf_writer .add_embedding_length (hparams ["hidden_size" ])
1214
1205
self .gguf_writer .add_block_count (block_count )
@@ -1683,7 +1674,7 @@ class GPT2Model(Model):
1683
1674
model_arch = gguf .MODEL_ARCH .GPT2
1684
1675
1685
1676
def set_gguf_parameters (self ):
1686
- self .gguf_writer .add_name (self .dir_model .name if self . model_name is None else self . model_name )
1677
+ self .gguf_writer .add_name (self .dir_model .name )
1687
1678
self .gguf_writer .add_block_count (self .hparams ["n_layer" ])
1688
1679
self .gguf_writer .add_context_length (self .hparams ["n_ctx" ])
1689
1680
self .gguf_writer .add_embedding_length (self .hparams ["n_embd" ])
@@ -2193,7 +2184,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
2193
2184
del bid # unused
2194
2185
2195
2186
# we are only using BERT for embeddings so we don't need the pooling layer
2196
- if name in ("embeddings.position_ids" , "pooler.dense.weight" , "pooler.dense.bias" ):
2187
+ if name in ("embeddings.position_ids" , "pooler.dense.weight" , "pooler.dense.bias" ) or "cls." in name :
2197
2188
return [] # we don't need these
2198
2189
2199
2190
return [(self .map_tensor_name (name ), data_torch )]
@@ -2250,7 +2241,7 @@ def set_gguf_parameters(self):
2250
2241
hparams = self .hparams
2251
2242
block_count = hparams ["num_hidden_layers" ]
2252
2243
2253
- self .gguf_writer .add_name (self .dir_model .name if self . model_name is None else self . model_name )
2244
+ self .gguf_writer .add_name (self .dir_model .name )
2254
2245
self .gguf_writer .add_context_length (hparams ["max_position_embeddings" ])
2255
2246
self .gguf_writer .add_embedding_length (hparams ["hidden_size" ])
2256
2247
self .gguf_writer .add_block_count (block_count )
@@ -2350,7 +2341,7 @@ def set_gguf_parameters(self):
2350
2341
# Fail early for models which don't have a block expansion factor of 2
2351
2342
assert d_inner == 2 * d_model
2352
2343
2353
- self .gguf_writer .add_name (self .dir_model .name if self . model_name is None else self . model_name )
2344
+ self .gguf_writer .add_name (self .dir_model .name )
2354
2345
self .gguf_writer .add_context_length (2 ** 20 ) # arbitrary value; for those who use the default
2355
2346
self .gguf_writer .add_embedding_length (d_model )
2356
2347
self .gguf_writer .add_feed_forward_length (0 ) # unused, but seemingly required when loading
@@ -2457,13 +2448,11 @@ def __init__(self, *args, **kwargs):
2457
2448
2458
2449
def get_tensors (self ):
2459
2450
for name , data in super ().get_tensors ():
2460
- if 'gated_layer ' in name :
2451
+ if 'gated_layers ' in name :
2461
2452
d1 = data [:self .intermediate_size , :]
2462
2453
name1 = name .replace ('gated_layers' , 'gated_layers_w' )
2463
- name1 = name1 .replace ('up_gated_layer' , 'gated_layers_v' )
2464
2454
d2 = data [self .intermediate_size :, :]
2465
2455
name2 = name .replace ('gated_layers' , 'gated_layers_v' )
2466
- name2 = name2 .replace ('up_gated_layer' , 'gated_layers_w' )
2467
2456
yield name1 , d1
2468
2457
yield name2 , d2
2469
2458
continue
@@ -2848,13 +2837,8 @@ def main() -> None:
2848
2837
hparams = Model .load_hparams (dir_model )
2849
2838
2850
2839
with torch .inference_mode ():
2851
- try :
2852
- model_class = Model .from_model_architecture (hparams ["architectures" ][0 ])
2853
- except NotImplementedError :
2854
- logger .error (f"Model { hparams ['architectures' ][0 ]} is not supported" )
2855
- sys .exit (1 )
2856
-
2857
- model_instance = model_class (dir_model , ftype_map [args .outtype ], fname_out , args .bigendian , args .use_temp_file , args .no_lazy , args .model_name )
2840
+ model_class = Model .from_model_architecture (hparams ["architectures" ][0 ])
2841
+ model_instance = model_class (dir_model , ftype_map [args .outtype ], fname_out , args .bigendian , args .use_temp_file , args .no_lazy )
2858
2842
2859
2843
logger .info ("Set model parameters" )
2860
2844
model_instance .set_gguf_parameters ()
@@ -2876,3 +2860,4 @@ def main() -> None:
2876
2860
2877
2861
if __name__ == '__main__' :
2878
2862
main ()
2863
+
0 commit comments