@@ -80,7 +80,7 @@ def count_model_parts(dir_model: str) -> int:
80
80
with open (dir_model + "/config.json" , "r" , encoding = "utf-8" ) as f :
81
81
hparams = json .load (f )
82
82
83
- if hparams ["architectures" ][0 ] != "RWForCausalLM" :
83
+ if hparams ["architectures" ][0 ] not in ( "RWForCausalLM" , "FalconForCausalLM" ) :
84
84
print ("Model architecture not supported: " + hparams ["architectures" ][0 ])
85
85
86
86
sys .exit ()
@@ -93,19 +93,34 @@ def count_model_parts(dir_model: str) -> int:
93
93
94
94
print ("gguf: get model metadata" )
95
95
96
- block_count = hparams ["n_layer" ]
96
+ if "n_layer" in hparams :
97
+ block_count = hparams ["n_layer" ]
98
+ elif "num_hidden_layers" in hparams :
99
+ block_count = hparams ["num_hidden_layers" ]
100
+ else :
101
+ print ("No block count found" )
102
+
103
+ sys .exit ()
104
+
105
+ if "n_head" in hparams :
106
+ n_head = hparams ["n_head" ]
107
+ elif "num_attention_heads" in hparams :
108
+ n_head = hparams ["num_attention_heads" ]
109
+ else :
110
+ print ("No head count found" )
111
+
112
+ sys .exit ()
113
+
114
+ n_head_kv = hparams ["n_head_kv" ] if "n_head_kv" in hparams else 1
97
115
98
116
gguf_writer .add_name ("Falcon" )
99
117
gguf_writer .add_context_length (2048 ) # not in config.json
100
118
gguf_writer .add_tensor_data_layout ("jploski" ) # qkv tensor transform
101
119
gguf_writer .add_embedding_length (hparams ["hidden_size" ])
102
120
gguf_writer .add_feed_forward_length (4 * hparams ["hidden_size" ])
103
121
gguf_writer .add_block_count (block_count )
104
- gguf_writer .add_head_count (hparams ["n_head" ])
105
- if "n_head_kv" in hparams :
106
- gguf_writer .add_head_count_kv (hparams ["n_head_kv" ])
107
- else :
108
- gguf_writer .add_head_count_kv (1 )
122
+ gguf_writer .add_head_count (n_head )
123
+ gguf_writer .add_head_count_kv (n_head_kv )
109
124
gguf_writer .add_layer_norm_eps (hparams ["layer_norm_epsilon" ])
110
125
gguf_writer .add_file_type (ftype )
111
126
@@ -190,9 +205,6 @@ def count_model_parts(dir_model: str) -> int:
190
205
tensor_map = gguf .get_tensor_name_map (ARCH ,block_count )
191
206
192
207
# params for qkv transform
193
- n_head = hparams ["n_head" ]
194
- n_head_kv = hparams ["n_head_kv" ] if "n_head_kv" in hparams else 1
195
-
196
208
head_dim = hparams ["hidden_size" ] // n_head
197
209
198
210
# tensor info
0 commit comments