@@ -1301,15 +1301,23 @@ def set_vocab(self):
1301
1301
try :
1302
1302
self . _set_vocab_sentencepiece ()
1303
1303
except FileNotFoundError :
1304
- self ._set_vocab_llama_hf ()
1305
-
1306
- special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = False ,
1307
- special_token_types = ['prefix' , 'suffix' , 'middle' , 'eot' ])
1308
- special_vocab ._set_special_token ("prefix" , 32007 )
1309
- special_vocab ._set_special_token ("suffix" , 32008 )
1310
- special_vocab ._set_special_token ("middle" , 32009 )
1311
- special_vocab ._set_special_token ("eot" , 32010 )
1312
- special_vocab .add_to_gguf (self .gguf_writer )
1304
+ try :
1305
+ self ._set_vocab_llama_hf ()
1306
+ except (FileNotFoundError , TypeError ):
1307
+ # Llama 3
1308
+ self ._set_vocab_gpt2 ()
1309
+
1310
+ # Apply to CodeLlama only (and ignore for Llama 3 with a vocab size of 128256)
1311
+ if self .hparams .get ("vocab_size" , 32000 ) == 32016 :
1312
+ special_vocab = gguf .SpecialVocab (
1313
+ self .dir_model , load_merges = False ,
1314
+ special_token_types = ['prefix' , 'suffix' , 'middle' , 'eot' ]
1315
+ )
1316
+ special_vocab ._set_special_token ("prefix" , 32007 )
1317
+ special_vocab ._set_special_token ("suffix" , 32008 )
1318
+ special_vocab ._set_special_token ("middle" , 32009 )
1319
+ special_vocab ._set_special_token ("eot" , 32010 )
1320
+ special_vocab .add_to_gguf (self .gguf_writer )
1313
1321
1314
1322
def set_gguf_parameters (self ):
1315
1323
super ().set_gguf_parameters ()
@@ -2194,6 +2202,8 @@ def set_vocab(self):
2194
2202
old_eos = special_vocab .special_token_ids ["eos" ]
2195
2203
if "chat" in os .path .basename (self .dir_model .absolute ()):
2196
2204
# For the chat model, we replace the eos with '<|im_end|>'.
2205
+ # TODO: this is a hack, should be fixed
2206
+ # https://github.com/ggerganov/llama.cpp/pull/6745#issuecomment-2067687048
2197
2207
special_vocab .special_token_ids ["eos" ] = self ._try_get_sft_eos (tokenizer )
2198
2208
print (f"Replace eos:{ old_eos } with a special token:{ special_vocab .special_token_ids ['eos' ]} \
2199
2209
in chat mode so that the conversation can end normally." )
@@ -2429,12 +2439,15 @@ class GemmaModel(Model):
2429
2439
2430
2440
def set_vocab (self ):
2431
2441
self ._set_vocab_sentencepiece ()
2442
+
2443
+ # TODO: these special tokens should be exported only for the CodeGemma family
2432
2444
special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = False ,
2433
- special_token_types = ['prefix' , 'suffix' , 'middle' , 'eot' ])
2445
+ special_token_types = ['prefix' , 'suffix' , 'middle' , 'fsep' , ' eot' ])
2434
2446
special_vocab ._set_special_token ("prefix" , 67 )
2435
2447
special_vocab ._set_special_token ("suffix" , 69 )
2436
2448
special_vocab ._set_special_token ("middle" , 68 )
2437
- special_vocab ._set_special_token ("eot" , 70 )
2449
+ special_vocab ._set_special_token ("fsep" , 70 )
2450
+ special_vocab ._set_special_token ("eot" , 107 )
2438
2451
special_vocab .add_to_gguf (self .gguf_writer )
2439
2452
2440
2453
def set_gguf_parameters (self ):
@@ -2523,28 +2536,34 @@ def set_vocab(self):
2523
2536
2524
2537
field = neox_reader .get_field (gguf .Keys .Tokenizer .MODEL )
2525
2538
self .gguf_writer .add_tokenizer_model (bytes (field .parts [- 1 ]))
2539
+
2526
2540
field = neox_reader .get_field (gguf .Keys .Tokenizer .LIST )
2527
2541
self .gguf_writer .add_token_list ([bytes (field .parts [i ]) for i in field .data ][:vocab_size ])
2542
+
2528
2543
field = neox_reader .get_field (gguf .Keys .Tokenizer .TOKEN_TYPE )
2529
2544
self .gguf_writer .add_token_types ([field .parts [i ].tolist ()[0 ] for i in field .data ][:vocab_size ])
2545
+
2530
2546
field = neox_reader .get_field (gguf .Keys .Tokenizer .MERGES )
2531
2547
self .gguf_writer .add_token_merges ([bytes (field .parts [i ]) for i in field .data ])
2548
+
2532
2549
field = neox_reader .get_field (gguf .Keys .Tokenizer .BOS_ID )
2533
2550
self .gguf_writer .add_bos_token_id (field .parts [- 1 ].tolist ()[0 ])
2551
+
2534
2552
field = neox_reader .get_field (gguf .Keys .Tokenizer .EOS_ID )
2535
2553
self .gguf_writer .add_eos_token_id (field .parts [- 1 ].tolist ()[0 ])
2554
+
2536
2555
field = neox_reader .get_field (gguf .Keys .Tokenizer .UNK_ID )
2537
2556
self .gguf_writer .add_unk_token_id (field .parts [- 1 ].tolist ()[0 ])
2538
2557
2539
2558
def set_gguf_parameters (self ):
2540
- d_model = self .find_hparam (["hidden_size" , "d_model" ])
2541
- d_conv = self .find_hparam (["conv_kernel" , "d_conv" ], optional = True ) or 4
2559
+ d_model = self .find_hparam (["hidden_size" , "d_model" ])
2560
+ d_conv = self .find_hparam (["conv_kernel" , "d_conv" ], optional = True ) or 4
2542
2561
d_inner = self .find_hparam (["intermediate_size" , "d_inner" ], optional = True ) or 2 * d_model
2543
- d_state = self .find_hparam (["state_size" , "d_state" ], optional = True ) or 16
2562
+ d_state = self .find_hparam (["state_size" , "d_state" ], optional = True ) or 16
2544
2563
# ceiling division
2545
2564
# ref: https://stackoverflow.com/a/17511341/22827863
2546
2565
# ref: https://github.com/state-spaces/mamba/blob/ce59daea3a090d011d6476c6e5b97f6d58ddad8b/mamba_ssm/modules/mamba_simple.py#L58
2547
- dt_rank = self .find_hparam (["time_step_rank" , "dt_rank" ], optional = True ) or - (d_model // - 16 )
2566
+ dt_rank = self .find_hparam (["time_step_rank" , "dt_rank" ], optional = True ) or - (d_model // - 16 )
2548
2567
rms_norm_eps = self .find_hparam (["layer_norm_epsilon" , "rms_norm_eps" ], optional = True ) or 1e-5
2549
2568
2550
2569
# Fail early for models which don't have a block expansion factor of 2
0 commit comments