Skip to content

Commit e276e4b

Browse files
committed
Fix convert-falcon-hf-to-gguf.py for rw models
1 parent fa3582f commit e276e4b

File tree

1 file changed

+22
-10
lines changed

1 file changed

+22
-10
lines changed

convert-falcon-hf-to-gguf.py

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def count_model_parts(dir_model: str) -> int:
8080
with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
8181
hparams = json.load(f)
8282

83-
if hparams["architectures"][0] != "RWForCausalLM":
83+
if hparams["architectures"][0] not in ("RWForCausalLM", "FalconForCausalLM"):
8484
print("Model architecture not supported: " + hparams["architectures"][0])
8585

8686
sys.exit()
@@ -93,19 +93,34 @@ def count_model_parts(dir_model: str) -> int:
9393

9494
print("gguf: get model metadata")
9595

96-
block_count = hparams["n_layer"]
96+
if "n_layer" in hparams:
97+
block_count = hparams["n_layer"]
98+
elif "num_hidden_layers" in hparams:
99+
block_count = hparams["num_hidden_layers"]
100+
else:
101+
print("No block count found")
102+
103+
sys.exit()
104+
105+
if "n_head" in hparams:
106+
n_head = hparams["n_head"]
107+
elif "num_attention_heads" in hparams:
108+
n_head = hparams["num_attention_heads"]
109+
else:
110+
print("No head count found")
111+
112+
sys.exit()
113+
114+
n_head_kv = hparams["n_head_kv"] if "n_head_kv" in hparams else 1
97115

98116
gguf_writer.add_name("Falcon")
99117
gguf_writer.add_context_length(2048) # not in config.json
100118
gguf_writer.add_tensor_data_layout("jploski") # qkv tensor transform
101119
gguf_writer.add_embedding_length(hparams["hidden_size"])
102120
gguf_writer.add_feed_forward_length(4 * hparams["hidden_size"])
103121
gguf_writer.add_block_count(block_count)
104-
gguf_writer.add_head_count(hparams["n_head"])
105-
if "n_head_kv" in hparams:
106-
gguf_writer.add_head_count_kv(hparams["n_head_kv"])
107-
else:
108-
gguf_writer.add_head_count_kv(1)
122+
gguf_writer.add_head_count(n_head)
123+
gguf_writer.add_head_count_kv(n_head_kv)
109124
gguf_writer.add_layer_norm_eps(hparams["layer_norm_epsilon"])
110125
gguf_writer.add_file_type(ftype)
111126

@@ -190,9 +205,6 @@ def count_model_parts(dir_model: str) -> int:
190205
tensor_map = gguf.get_tensor_name_map(ARCH,block_count)
191206

192207
# params for qkv transform
193-
n_head = hparams["n_head"]
194-
n_head_kv = hparams["n_head_kv"] if "n_head_kv" in hparams else 1
195-
196208
head_dim = hparams["hidden_size"] // n_head
197209

198210
# tensor info

0 commit comments

Comments
 (0)