Skip to content

Commit 59bafa9

Browse files
Sang-Kil Parkhodlen
Sang-Kil Park
authored andcommitted
py : improve BPE tokenizer support (ggml-org#5189)
1 parent 4d54544 commit 59bafa9

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

convert.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,10 @@ def load(model_plus: ModelPlus) -> Params:
334334
class BpeVocab:
335335
def __init__(self, fname_tokenizer: Path, fname_added_tokens: Path | None) -> None:
336336
self.bpe_tokenizer = json.loads(open(str(fname_tokenizer), encoding="utf-8").read())
337-
self.vocab = self.bpe_tokenizer["model"]["vocab"]
337+
try:
338+
self.vocab = self.bpe_tokenizer["model"]["vocab"]
339+
except:
340+
self.vocab = self.bpe_tokenizer
338341
added_tokens: dict[str, int]
339342
if fname_added_tokens is not None:
340343
# FIXME: Verify that added tokens here _cannot_ overlap with the main vocab.

0 commit comments

Comments
 (0)