Skip to content

Commit c8297c6

Browse files
llama : add phi3 support (#6852)
* add explicit phi3 support * add explicit phi3 support * remove unused code * convert : add BOS token * llama : match EOT token <|end|> * llama : minor / style * llama : tabs -> spaces * convert : fix lint checks --------- Co-authored-by: Georgi Gerganov <[email protected]>
1 parent 4e96a81 commit c8297c6

File tree

4 files changed

+294
-1
lines changed

4 files changed

+294
-1
lines changed

convert-hf-to-gguf.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1979,6 +1979,91 @@ def set_gguf_parameters(self):
19791979
self.gguf_writer.add_add_bos_token(False)
19801980

19811981

1982+
@Model.register("Phi3ForCausalLM")
1983+
class Phi3MiniModel(Model):
1984+
model_arch = gguf.MODEL_ARCH.PHI3
1985+
1986+
def set_vocab(self):
1987+
from sentencepiece import SentencePieceProcessor
1988+
1989+
tokenizer_path = self.dir_model / 'tokenizer.model'
1990+
1991+
if not tokenizer_path.is_file():
1992+
print(f'Error: Missing {tokenizer_path}', file=sys.stderr)
1993+
sys.exit(1)
1994+
1995+
tokenizer = SentencePieceProcessor(str(tokenizer_path))
1996+
1997+
vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
1998+
1999+
tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
2000+
scores: list[float] = [-10000.0] * vocab_size
2001+
toktypes: list[int] = [SentencePieceTokenTypes.UNKNOWN] * vocab_size
2002+
2003+
for token_id in range(tokenizer.vocab_size()):
2004+
2005+
piece = tokenizer.id_to_piece(token_id)
2006+
text = piece.encode("utf-8")
2007+
score = tokenizer.get_score(token_id)
2008+
2009+
toktype = SentencePieceTokenTypes.NORMAL
2010+
if tokenizer.is_unknown(token_id):
2011+
toktype = SentencePieceTokenTypes.UNKNOWN
2012+
elif tokenizer.is_control(token_id):
2013+
toktype = SentencePieceTokenTypes.CONTROL
2014+
elif tokenizer.is_unused(token_id):
2015+
toktype = SentencePieceTokenTypes.UNUSED
2016+
elif tokenizer.is_byte(token_id):
2017+
toktype = SentencePieceTokenTypes.BYTE
2018+
2019+
tokens[token_id] = text
2020+
scores[token_id] = score
2021+
toktypes[token_id] = toktype
2022+
2023+
added_tokens_file = self.dir_model / 'added_tokens.json'
2024+
if added_tokens_file.is_file():
2025+
with open(added_tokens_file, "r", encoding="utf-8") as f:
2026+
added_tokens_json = json.load(f)
2027+
2028+
for key in added_tokens_json:
2029+
token_id = added_tokens_json[key]
2030+
if (token_id >= vocab_size):
2031+
print(f'ignore token {token_id}: id is out of range, max={vocab_size - 1}')
2032+
continue
2033+
2034+
tokens[token_id] = key.encode("utf-8")
2035+
scores[token_id] = -1000.0
2036+
toktypes[token_id] = SentencePieceTokenTypes.USER_DEFINED
2037+
2038+
self.gguf_writer.add_tokenizer_model("llama")
2039+
self.gguf_writer.add_token_list(tokens)
2040+
self.gguf_writer.add_token_scores(scores)
2041+
self.gguf_writer.add_token_types(toktypes)
2042+
2043+
special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
2044+
special_vocab.add_to_gguf(self.gguf_writer)
2045+
2046+
def set_gguf_parameters(self):
2047+
block_count = self.find_hparam(["num_hidden_layers", "n_layer"])
2048+
2049+
rot_pct = 1.0
2050+
n_embd = self.find_hparam(["hidden_size", "n_embd"])
2051+
n_head = self.find_hparam(["num_attention_heads", "n_head"])
2052+
rms_eps = self.find_hparam(["rms_norm_eps"])
2053+
2054+
self.gguf_writer.add_name("Phi3")
2055+
self.gguf_writer.add_context_length(self.find_hparam(["n_positions", "max_position_embeddings"]))
2056+
2057+
self.gguf_writer.add_embedding_length(n_embd)
2058+
self.gguf_writer.add_feed_forward_length(8192)
2059+
self.gguf_writer.add_block_count(block_count)
2060+
self.gguf_writer.add_head_count(n_head)
2061+
self.gguf_writer.add_head_count_kv(n_head)
2062+
self.gguf_writer.add_layer_norm_rms_eps(rms_eps)
2063+
self.gguf_writer.add_rope_dimension_count(int(rot_pct * n_embd) // n_head)
2064+
self.gguf_writer.add_file_type(self.ftype)
2065+
2066+
19822067
@Model.register("PlamoForCausalLM")
19832068
class PlamoModel(Model):
19842069
model_arch = gguf.MODEL_ARCH.PLAMO

gguf-py/gguf/constants.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ class MODEL_ARCH(IntEnum):
124124
QWEN2 = auto()
125125
QWEN2MOE = auto()
126126
PHI2 = auto()
127+
PHI3 = auto()
127128
PLAMO = auto()
128129
CODESHELL = auto()
129130
ORION = auto()
@@ -200,6 +201,7 @@ class MODEL_TENSOR(IntEnum):
200201
MODEL_ARCH.QWEN2: "qwen2",
201202
MODEL_ARCH.QWEN2MOE: "qwen2moe",
202203
MODEL_ARCH.PHI2: "phi2",
204+
MODEL_ARCH.PHI3: "phi3",
203205
MODEL_ARCH.PLAMO: "plamo",
204206
MODEL_ARCH.CODESHELL: "codeshell",
205207
MODEL_ARCH.ORION: "orion",
@@ -550,6 +552,20 @@ class MODEL_TENSOR(IntEnum):
550552
MODEL_TENSOR.FFN_DOWN,
551553
MODEL_TENSOR.FFN_UP,
552554
],
555+
MODEL_ARCH.PHI3: [
556+
MODEL_TENSOR.TOKEN_EMBD,
557+
MODEL_TENSOR.OUTPUT_NORM,
558+
MODEL_TENSOR.OUTPUT,
559+
MODEL_TENSOR.ATTN_NORM,
560+
MODEL_TENSOR.ATTN_QKV,
561+
MODEL_TENSOR.ATTN_Q,
562+
MODEL_TENSOR.ATTN_K,
563+
MODEL_TENSOR.ATTN_V,
564+
MODEL_TENSOR.ATTN_OUT,
565+
MODEL_TENSOR.FFN_NORM,
566+
MODEL_TENSOR.FFN_DOWN,
567+
MODEL_TENSOR.FFN_UP,
568+
],
553569
MODEL_ARCH.CODESHELL: [
554570
MODEL_TENSOR.TOKEN_EMBD,
555571
MODEL_TENSOR.POS_EMBD,

gguf-py/gguf/tensor_mapping.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ class TensorNameMap:
117117
"h.{bid}.attn.c_attn", # gpt2
118118
"transformer.h.{bid}.mixer.Wqkv", # phi2
119119
"encoder.layers.{bid}.attn.Wqkv", # nomic-bert
120+
"model.layers.{bid}.self_attn.qkv_proj" # phi3
120121
),
121122

122123
# Attention query
@@ -234,6 +235,7 @@ class TensorNameMap:
234235
"h.{bid}.mlp.c_fc", # gpt2
235236
"transformer.h.{bid}.mlp.fc1", # phi2
236237
"model.layers.{bid}.mlp.fc1", # phi2
238+
"model.layers.{bid}.mlp.gate_up_proj", # phi3
237239
"model.layers.layers.{bid}.mlp.up_proj", # plamo
238240
"model.layers.{bid}.feed_forward.w3", # internlm2
239241
"encoder.layers.{bid}.mlp.fc11", # nomic-bert

0 commit comments

Comments
 (0)