@@ -1979,6 +1979,91 @@ def set_gguf_parameters(self):
1979
1979
self .gguf_writer .add_add_bos_token (False )
1980
1980
1981
1981
1982
+ @Model .register ("Phi3ForCausalLM" )
1983
+ class Phi3MiniModel (Model ):
1984
+ model_arch = gguf .MODEL_ARCH .PHI3
1985
+
1986
+ def set_vocab (self ):
1987
+ from sentencepiece import SentencePieceProcessor
1988
+
1989
+ tokenizer_path = self .dir_model / 'tokenizer.model'
1990
+
1991
+ if not tokenizer_path .is_file ():
1992
+ print (f'Error: Missing { tokenizer_path } ' , file = sys .stderr )
1993
+ sys .exit (1 )
1994
+
1995
+ tokenizer = SentencePieceProcessor (str (tokenizer_path ))
1996
+
1997
+ vocab_size = self .hparams .get ('vocab_size' , tokenizer .vocab_size ())
1998
+
1999
+ tokens : list [bytes ] = [f"[PAD{ i } ]" .encode ("utf-8" ) for i in range (vocab_size )]
2000
+ scores : list [float ] = [- 10000.0 ] * vocab_size
2001
+ toktypes : list [int ] = [SentencePieceTokenTypes .UNKNOWN ] * vocab_size
2002
+
2003
+ for token_id in range (tokenizer .vocab_size ()):
2004
+
2005
+ piece = tokenizer .id_to_piece (token_id )
2006
+ text = piece .encode ("utf-8" )
2007
+ score = tokenizer .get_score (token_id )
2008
+
2009
+ toktype = SentencePieceTokenTypes .NORMAL
2010
+ if tokenizer .is_unknown (token_id ):
2011
+ toktype = SentencePieceTokenTypes .UNKNOWN
2012
+ elif tokenizer .is_control (token_id ):
2013
+ toktype = SentencePieceTokenTypes .CONTROL
2014
+ elif tokenizer .is_unused (token_id ):
2015
+ toktype = SentencePieceTokenTypes .UNUSED
2016
+ elif tokenizer .is_byte (token_id ):
2017
+ toktype = SentencePieceTokenTypes .BYTE
2018
+
2019
+ tokens [token_id ] = text
2020
+ scores [token_id ] = score
2021
+ toktypes [token_id ] = toktype
2022
+
2023
+ added_tokens_file = self .dir_model / 'added_tokens.json'
2024
+ if added_tokens_file .is_file ():
2025
+ with open (added_tokens_file , "r" , encoding = "utf-8" ) as f :
2026
+ added_tokens_json = json .load (f )
2027
+
2028
+ for key in added_tokens_json :
2029
+ token_id = added_tokens_json [key ]
2030
+ if (token_id >= vocab_size ):
2031
+ print (f'ignore token { token_id } : id is out of range, max={ vocab_size - 1 } ' )
2032
+ continue
2033
+
2034
+ tokens [token_id ] = key .encode ("utf-8" )
2035
+ scores [token_id ] = - 1000.0
2036
+ toktypes [token_id ] = SentencePieceTokenTypes .USER_DEFINED
2037
+
2038
+ self .gguf_writer .add_tokenizer_model ("llama" )
2039
+ self .gguf_writer .add_token_list (tokens )
2040
+ self .gguf_writer .add_token_scores (scores )
2041
+ self .gguf_writer .add_token_types (toktypes )
2042
+
2043
+ special_vocab = gguf .SpecialVocab (self .dir_model , n_vocab = len (tokens ))
2044
+ special_vocab .add_to_gguf (self .gguf_writer )
2045
+
2046
+ def set_gguf_parameters (self ):
2047
+ block_count = self .find_hparam (["num_hidden_layers" , "n_layer" ])
2048
+
2049
+ rot_pct = 1.0
2050
+ n_embd = self .find_hparam (["hidden_size" , "n_embd" ])
2051
+ n_head = self .find_hparam (["num_attention_heads" , "n_head" ])
2052
+ rms_eps = self .find_hparam (["rms_norm_eps" ])
2053
+
2054
+ self .gguf_writer .add_name ("Phi3" )
2055
+ self .gguf_writer .add_context_length (self .find_hparam (["n_positions" , "max_position_embeddings" ]))
2056
+
2057
+ self .gguf_writer .add_embedding_length (n_embd )
2058
+ self .gguf_writer .add_feed_forward_length (8192 )
2059
+ self .gguf_writer .add_block_count (block_count )
2060
+ self .gguf_writer .add_head_count (n_head )
2061
+ self .gguf_writer .add_head_count_kv (n_head )
2062
+ self .gguf_writer .add_layer_norm_rms_eps (rms_eps )
2063
+ self .gguf_writer .add_rope_dimension_count (int (rot_pct * n_embd ) // n_head )
2064
+ self .gguf_writer .add_file_type (self .ftype )
2065
+
2066
+
1982
2067
@Model .register ("PlamoForCausalLM" )
1983
2068
class PlamoModel (Model ):
1984
2069
model_arch = gguf .MODEL_ARCH .PLAMO
0 commit comments