Skip to content

Commit ce00528

Browse files
committed
convert.py: Set gpt2 as tokenizer model when using BPE
1 parent 58fa4dc commit ce00528

File tree

1 file changed

+6
-1
lines changed

1 file changed

+6
-1
lines changed

convert.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -846,7 +846,12 @@ def add_meta_vocab(self, vocab: Vocab) -> None:
846846
scores.append(score)
847847
toktypes.append(toktype)
848848

849-
self.gguf.add_tokenizer_model("llama")
849+
if isinstance(vocab, SentencePieceVocab):
850+
self.gguf.add_tokenizer_model("llama")
851+
elif isinstance(vocab, BpeVocab):
852+
self.gguf.add_tokenizer_model("gpt2")
853+
else:
854+
raise ValueError(f'Unknown vocab type: Not BpeVocab or SentencePieceVocab')
850855
self.gguf.add_token_list(tokens)
851856
self.gguf.add_token_scores(scores)
852857
self.gguf.add_token_types(toktypes)

0 commit comments

Comments
 (0)