Skip to content

Commit aa90c83

Browse files
committed
Ignore unusable json values
1 parent cd156d7 commit aa90c83

File tree

1 file changed

+7
-6
lines changed

1 file changed

+7
-6
lines changed

convert.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -190,21 +190,22 @@ def __init__(self, fname_tokenizer: Path, fname_added_tokens: Optional[Path], fn
190190
else:
191191
tokenizer_config = {}
192192
for key, value in tokenizer_config.items():
193-
assert isinstance(value, dict) or isinstance(value, str)
194-
if key not in TOKEN_NAME_TO_ID or TOKEN_NAME_TO_ID[key] == -1:
193+
if not isinstance(value, dict) or not isinstance(value, str):
195194
continue
196-
self.special_tokens_map[TOKEN_NAME_TO_ID[key]] = value["content"] if isinstance(value, dict) else value
195+
token_id = TOKEN_NAME_TO_ID.get(key, -1)
196+
if token_id == -1:
197+
continue
198+
self.special_tokens_map[token_id] = value["content"] if isinstance(value, dict) else value
197199

198200
special_tokens: Dict[str, Any]
199201
if fname_special_tokens is not None:
200202
special_tokens = json.load(open(fname_special_tokens))
201203
else:
202204
special_tokens = {}
203205
for key, value in special_tokens.items():
204-
assert isinstance(value, dict) or isinstance(value, str)
205-
if key not in TOKEN_NAME_TO_ID:
206+
if not isinstance(value, dict) or not isinstance(value, str):
206207
continue
207-
token_id = TOKEN_NAME_TO_ID[key]
208+
token_id = TOKEN_NAME_TO_ID.get(key, -1)
208209
if token_id == -1 or token_id in self.special_tokens_map:
209210
continue
210211
self.special_tokens_map[token_id] = value["content"] if isinstance(value, dict) else value

0 commit comments

Comments
 (0)