Skip to content

Commit 3292733

Browse files
authored
convert : skip unaccessible HF repos (#7210)
1 parent 9886313 commit 3292733

File tree

1 file changed

+20
-2
lines changed

1 file changed

+20
-2
lines changed

convert-hf-to-gguf-update.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,17 @@ def download_file_with_auth(url, token, save_path):
145145
if tokt == TOKENIZER_TYPE.SPM:
146146
continue
147147

148+
# Skip if the tokenizer folder does not exist or there are other download issues previously
149+
if not os.path.exists(f"models/tokenizers/{name}"):
150+
logger.warning(f"Directory for tokenizer {name} not found. Skipping...")
151+
continue
152+
148153
# create the tokenizer
149-
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
154+
try:
155+
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
156+
except OSError as e:
157+
logger.error(f"Error loading tokenizer for model {name}. The model may not exist or is not accessible with the provided token. Error: {e}")
158+
continue # Skip to the next model if the tokenizer can't be loaded
150159

151160
chktok = tokenizer.encode(chktxt)
152161
chkhsh = sha256(str(chktok).encode()).hexdigest()
@@ -287,8 +296,17 @@ def get_vocab_base_pre(self, tokenizer) -> str:
287296
name = model["name"]
288297
tokt = model["tokt"]
289298

299+
# Skip if the tokenizer folder does not exist or there are other download issues previously
300+
if not os.path.exists(f"models/tokenizers/{name}"):
301+
logger.warning(f"Directory for tokenizer {name} not found. Skipping...")
302+
continue
303+
290304
# create the tokenizer
291-
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
305+
try:
306+
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
307+
except OSError as e:
308+
logger.error(f"Failed to load tokenizer for model {name}. Error: {e}")
309+
continue # Skip this model and continue with the next one in the loop
292310

293311
with open(f"models/ggml-vocab-{name}.gguf.inp", "w", encoding="utf-8") as f:
294312
for text in tests:

0 commit comments

Comments
 (0)