Skip to content

Commit 42c90d2

Browse files
committed
fix issue ggml-org#7924
1 parent 172c825 commit 42c90d2

File tree

1 file changed

+33
-48
lines changed

1 file changed

+33
-48
lines changed

convert-hf-to-gguf.py

Lines changed: 33 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#!/usr/bin/env python3
2-
# -*- coding: utf-8 -*-
32

43
from __future__ import annotations
54

@@ -47,12 +46,11 @@ class Model:
4746
_model_classes: dict[str, type[Model]] = {}
4847

4948
dir_model: Path
50-
ftype: gguf.LlamaFileType
49+
ftype: int
5150
is_big_endian: bool
5251
endianess: gguf.GGUFEndian
5352
use_temp_file: bool
5453
lazy: bool
55-
model_name: str | None
5654
part_names: list[str]
5755
is_safetensors: bool
5856
hparams: dict[str, Any]
@@ -65,7 +63,7 @@ class Model:
6563
# subclasses should define this!
6664
model_arch: gguf.MODEL_ARCH
6765

68-
def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool, model_name: str | None):
66+
def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool):
6967
if type(self) is Model:
7068
raise TypeError(f"{type(self).__name__!r} should not be directly instantiated")
7169
self.dir_model = dir_model
@@ -74,11 +72,10 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
7472
self.endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE
7573
self.use_temp_file = use_temp_file
7674
self.lazy = not eager
77-
self.model_name = model_name
78-
self.part_names = Model.get_model_part_names(self.dir_model, "model", ".safetensors")
75+
self.part_names = Model.get_model_part_names(self.dir_model, ".safetensors")
7976
self.is_safetensors = len(self.part_names) > 0
8077
if not self.is_safetensors:
81-
self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin")
78+
self.part_names = Model.get_model_part_names(self.dir_model, ".bin")
8279
self.hparams = Model.load_hparams(self.dir_model)
8380
self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer"])
8481
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
@@ -96,7 +93,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
9693
ftype_lw: str = ftype_up.lower()
9794
# allow templating the file name with the output ftype, useful with the "auto" ftype
9895
self.fname_out = fname_out.parent / fname_out.name.format(ftype_lw, outtype=ftype_lw, ftype=ftype_lw, OUTTYPE=ftype_up, FTYPE=ftype_up)
99-
self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file)
96+
self.gguf_writer = gguf.GGUFWriter(self.fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file)
10097

10198
@classmethod
10299
def __init_subclass__(cls):
@@ -140,7 +137,7 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
140137
from safetensors import safe_open
141138
ctx = cast(ContextManager[Any], safe_open(self.dir_model / part_name, framework="pt", device="cpu"))
142139
else:
143-
ctx = contextlib.nullcontext(torch.load(str(self.dir_model / part_name), map_location="cpu", mmap=True, weights_only=True))
140+
ctx = contextlib.nullcontext(torch.load(str(self.dir_model / part_name), map_location="cpu", mmap=False, weights_only=True))
144141

145142
with ctx as model_part:
146143
tensor_names_from_parts.update(model_part.keys())
@@ -177,14 +174,14 @@ def match_model_tensor_name(self, name: str, key: gguf.MODEL_TENSOR, bid: int |
177174
return False
178175
return name == (key_name + suffix)
179176

180-
def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", ".bias")) -> str:
177+
def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", ".bias", ".beta", ".gamma")) -> str:
181178
new_name = self.tensor_map.get_name(key=name, try_suffixes=try_suffixes)
182179
if new_name is None:
183180
raise ValueError(f"Can not map tensor {name!r}")
184181
return new_name
185182

186183
def set_gguf_parameters(self):
187-
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
184+
self.gguf_writer.add_name(self.dir_model.name)
188185
self.gguf_writer.add_block_count(self.block_count)
189186

190187
if (n_ctx := self.find_hparam(["max_position_embeddings", "n_ctx"], optional=True)) is not None:
@@ -248,6 +245,9 @@ def write_tensors(self):
248245
# we don't need these
249246
if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
250247
continue
248+
249+
if name.startswith("bert."):
250+
name = name.removeprefix("bert.")
251251

252252
old_dtype = data_torch.dtype
253253

@@ -261,7 +261,7 @@ def write_tensors(self):
261261
if part.isdecimal():
262262
bid = int(part)
263263
break
264-
264+
265265
for new_name, data in ((n, d.squeeze().numpy()) for n, d in self.modify_tensors(data_torch, name, bid)):
266266
data: np.ndarray = data # type hint
267267
n_dims = len(data.shape)
@@ -326,21 +326,21 @@ def write_tensors(self):
326326

327327
def write(self):
328328
self.write_tensors()
329-
self.gguf_writer.write_header_to_file(self.fname_out)
329+
self.gguf_writer.write_header_to_file()
330330
self.gguf_writer.write_kv_data_to_file()
331331
self.gguf_writer.write_tensors_to_file(progress=True)
332332
self.gguf_writer.close()
333333

334334
def write_vocab(self):
335-
self.gguf_writer.write_header_to_file(self.fname_out)
335+
self.gguf_writer.write_header_to_file()
336336
self.gguf_writer.write_kv_data_to_file()
337337
self.gguf_writer.close()
338338

339339
@staticmethod
340-
def get_model_part_names(dir_model: Path, prefix: str, suffix: str) -> list[str]:
340+
def get_model_part_names(dir_model: Path, suffix: str) -> list[str]:
341341
part_names: list[str] = []
342342
for filename in os.listdir(dir_model):
343-
if filename.startswith(prefix) and filename.endswith(suffix):
343+
if filename.endswith(suffix):
344344
part_names.append(filename)
345345

346346
part_names.sort()
@@ -423,9 +423,6 @@ def get_vocab_base_pre(self, tokenizer) -> str:
423423
# NOTE: if you get an error here, you need to update the convert-hf-to-gguf-update.py script
424424
# or pull the latest version of the model from Huggingface
425425
# don't edit the hashes manually!
426-
if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5":
427-
# ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B
428-
res = "llama-bpe"
429426
if chkhsh == "049ecf7629871e3041641907f3de7c733e4dbfdc736f57d882ba0b0845599754":
430427
# ref: https://huggingface.co/deepseek-ai/deepseek-llm-7b-base
431428
res = "deepseek-llm"
@@ -435,6 +432,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
435432
if chkhsh == "8aeee3860c56296a157a1fe2fad249ec40aa59b1bb5709f4ade11c4e6fe652ed":
436433
# ref: https://huggingface.co/tiiuae/falcon-7b
437434
res = "falcon"
435+
if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
436+
# ref: https://huggingface.co/google-bert/bert-base-uncased
437+
res = "bert"
438438
if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
439439
# ref: https://huggingface.co/BAAI/bge-small-en-v1.5
440440
res = "bert-bge"
@@ -453,18 +453,12 @@ def get_vocab_base_pre(self, tokenizer) -> str:
453453
if chkhsh == "6221ad2852e85ce96f791f476e0b390cf9b474c9e3d1362f53a24a06dc8220ff":
454454
# ref: https://huggingface.co/smallcloudai/Refact-1_6-base
455455
res = "refact"
456-
if chkhsh == "9c2227e4dd922002fb81bde4fc02b0483ca4f12911410dee2255e4987644e3f8":
457-
# ref: https://huggingface.co/CohereForAI/c4ai-command-r-v01
458-
res = "command-r"
459456
if chkhsh == "e636dc30a262dcc0d8c323492e32ae2b70728f4df7dfe9737d9f920a282b8aea":
460457
# ref: https://huggingface.co/Qwen/Qwen1.5-7B
461458
res = "qwen2"
462459
if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166":
463460
# ref: https://huggingface.co/allenai/OLMo-1.7-7B-hf
464461
res = "olmo"
465-
if chkhsh == "a8594e3edff7c29c003940395316294b2c623e09894deebbc65f33f1515df79e":
466-
# ref: https://huggingface.co/databricks/dbrx-base
467-
res = "dbrx"
468462
if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
469463
# ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-en
470464
res = "jina-v2-en"
@@ -477,9 +471,6 @@ def get_vocab_base_pre(self, tokenizer) -> str:
477471
if chkhsh == "c136ed14d01c2745d4f60a9596ae66800e2b61fa45643e72436041855ad4089d":
478472
# ref: https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct
479473
res = "smaug-bpe"
480-
if chkhsh == "7967bfa498ade6b757b064f31e964dddbb80f8f9a4d68d4ba7998fcf281c531a":
481-
# ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-code
482-
res = "jina-v2-code"
483474

484475
if res is None:
485476
logger.warning("\n")
@@ -667,7 +658,7 @@ class GPTNeoXModel(Model):
667658
def set_gguf_parameters(self):
668659
block_count = self.hparams["num_hidden_layers"]
669660

670-
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
661+
self.gguf_writer.add_name(self.dir_model.name)
671662
self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
672663
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
673664
self.gguf_writer.add_block_count(block_count)
@@ -800,7 +791,7 @@ def set_vocab(self):
800791

801792
def set_gguf_parameters(self):
802793
block_count = self.hparams["n_layers"]
803-
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
794+
self.gguf_writer.add_name(self.dir_model.name)
804795
self.gguf_writer.add_context_length(self.hparams["max_seq_len"])
805796
self.gguf_writer.add_embedding_length(self.hparams["d_model"])
806797
self.gguf_writer.add_block_count(block_count)
@@ -852,7 +843,7 @@ def set_gguf_parameters(self):
852843
raise ValueError("gguf: can not find ctx length parameter.")
853844

854845
self.gguf_writer.add_file_type(self.ftype)
855-
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
846+
self.gguf_writer.add_name(self.dir_model.name)
856847
self.gguf_writer.add_source_hf_repo(hf_repo)
857848
self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
858849
self.gguf_writer.add_context_length(ctx_length)
@@ -889,7 +880,7 @@ def set_gguf_parameters(self):
889880
else:
890881
raise ValueError("gguf: can not find ctx length parameter.")
891882

892-
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
883+
self.gguf_writer.add_name(self.dir_model.name)
893884
self.gguf_writer.add_source_hf_repo(hf_repo)
894885
self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
895886
self.gguf_writer.add_context_length(ctx_length)
@@ -1012,7 +1003,7 @@ def set_gguf_parameters(self):
10121003
else:
10131004
raise ValueError("gguf: can not find ctx length parameter.")
10141005

1015-
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
1006+
self.gguf_writer.add_name(self.dir_model.name)
10161007
self.gguf_writer.add_source_hf_repo(hf_repo)
10171008
self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
10181009
self.gguf_writer.add_context_length(ctx_length)
@@ -1208,7 +1199,7 @@ def set_gguf_parameters(self):
12081199
hparams = self.hparams
12091200
block_count = hparams["num_hidden_layers"]
12101201

1211-
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
1202+
self.gguf_writer.add_name(self.dir_model.name)
12121203
self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
12131204
self.gguf_writer.add_embedding_length(hparams["hidden_size"])
12141205
self.gguf_writer.add_block_count(block_count)
@@ -1683,7 +1674,7 @@ class GPT2Model(Model):
16831674
model_arch = gguf.MODEL_ARCH.GPT2
16841675

16851676
def set_gguf_parameters(self):
1686-
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
1677+
self.gguf_writer.add_name(self.dir_model.name)
16871678
self.gguf_writer.add_block_count(self.hparams["n_layer"])
16881679
self.gguf_writer.add_context_length(self.hparams["n_ctx"])
16891680
self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
@@ -2193,7 +2184,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
21932184
del bid # unused
21942185

21952186
# we are only using BERT for embeddings so we don't need the pooling layer
2196-
if name in ("embeddings.position_ids", "pooler.dense.weight", "pooler.dense.bias"):
2187+
if name in ("embeddings.position_ids", "pooler.dense.weight", "pooler.dense.bias") or "cls." in name:
21972188
return [] # we don't need these
21982189

21992190
return [(self.map_tensor_name(name), data_torch)]
@@ -2250,7 +2241,7 @@ def set_gguf_parameters(self):
22502241
hparams = self.hparams
22512242
block_count = hparams["num_hidden_layers"]
22522243

2253-
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
2244+
self.gguf_writer.add_name(self.dir_model.name)
22542245
self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
22552246
self.gguf_writer.add_embedding_length(hparams["hidden_size"])
22562247
self.gguf_writer.add_block_count(block_count)
@@ -2350,7 +2341,7 @@ def set_gguf_parameters(self):
23502341
# Fail early for models which don't have a block expansion factor of 2
23512342
assert d_inner == 2 * d_model
23522343

2353-
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
2344+
self.gguf_writer.add_name(self.dir_model.name)
23542345
self.gguf_writer.add_context_length(2**20) # arbitrary value; for those who use the default
23552346
self.gguf_writer.add_embedding_length(d_model)
23562347
self.gguf_writer.add_feed_forward_length(0) # unused, but seemingly required when loading
@@ -2457,13 +2448,11 @@ def __init__(self, *args, **kwargs):
24572448

24582449
def get_tensors(self):
24592450
for name, data in super().get_tensors():
2460-
if 'gated_layer' in name:
2451+
if 'gated_layers' in name:
24612452
d1 = data[:self.intermediate_size, :]
24622453
name1 = name.replace('gated_layers', 'gated_layers_w')
2463-
name1 = name1.replace('up_gated_layer', 'gated_layers_v')
24642454
d2 = data[self.intermediate_size:, :]
24652455
name2 = name.replace('gated_layers', 'gated_layers_v')
2466-
name2 = name2.replace('up_gated_layer', 'gated_layers_w')
24672456
yield name1, d1
24682457
yield name2, d2
24692458
continue
@@ -2848,13 +2837,8 @@ def main() -> None:
28482837
hparams = Model.load_hparams(dir_model)
28492838

28502839
with torch.inference_mode():
2851-
try:
2852-
model_class = Model.from_model_architecture(hparams["architectures"][0])
2853-
except NotImplementedError:
2854-
logger.error(f"Model {hparams['architectures'][0]} is not supported")
2855-
sys.exit(1)
2856-
2857-
model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file, args.no_lazy, args.model_name)
2840+
model_class = Model.from_model_architecture(hparams["architectures"][0])
2841+
model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file, args.no_lazy)
28582842

28592843
logger.info("Set model parameters")
28602844
model_instance.set_gguf_parameters()
@@ -2876,3 +2860,4 @@ def main() -> None:
28762860

28772861
if __name__ == '__main__':
28782862
main()
2863+

0 commit comments

Comments
 (0)