Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 37 additions & 14 deletions vllm/transformers_utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import os
from pathlib import Path
from typing import Any, Dict, Literal, Optional, Type, Union

import time
import huggingface_hub
from huggingface_hub import (file_exists, hf_hub_download, list_repo_files,
try_to_load_from_cache)
Expand Down Expand Up @@ -100,15 +100,26 @@

# NB: file_exists will only check for the existence of the config file on
# hf_hub. This will fail in offline mode.
try:
return file_exists(model,
config_name,
revision=revision,
token=HF_TOKEN)
except huggingface_hub.errors.OfflineModeIsEnabled:
# Don't raise in offline mode, all we know is that we don't have this
# file cached.
return False

# Call HF to check if the file exists, with 3 retries and exponential backoff

Check failure on line 104 in vllm/transformers_utils/config.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

vllm/transformers_utils/config.py:104:81: E501 Line too long (81 > 80)
max_retries = 3
retry_delay = 2
for attempt in range(max_retries):
try:
return file_exists(model,
config_name,
revision=revision,
token=HF_TOKEN)
except huggingface_hub.errors.OfflineModeIsEnabled:
# Don't raise in offline mode, all we know is that we don't have this

Check failure on line 114 in vllm/transformers_utils/config.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

vllm/transformers_utils/config.py:114:81: E501 Line too long (81 > 80)
# file cached.
return False
except Exception as e:
logger.error(f"Error checking file existence: {e}")

Check failure on line 118 in vllm/transformers_utils/config.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (G004)

vllm/transformers_utils/config.py:118:26: G004 Logging statement uses f-string
if attempt == max_retries - 1:
raise
time.sleep(retry_delay)
retry_delay *= 2
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm - doesn't this mean we're printing the logger.error for the first two retries but not actually raising? This can be misleading to users sometimes.

Can we do something like the below?

Suggested change
except Exception as e:
logger.error(f"Error checking file existence: {e}")
if attempt == max_retries - 1:
raise
time.sleep(retry_delay)
retry_delay *= 2
except Exception as e:
if attempt == max_retries - 1:
logger.error(f"Maximum amount of retries {(max_retries)} reached; Error checking file existence: {e}")
raise
logger.warning(f"Error checking file existence, retrying {(attempt + 1)}: {e}")
time.sleep(retry_delay)
retry_delay *= 2

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah yes this makes sense.. fixing



def patch_rope_scaling(config: PretrainedConfig) -> None:
Expand Down Expand Up @@ -193,10 +204,22 @@
# raise an offline mode error to indicate to the user that they
# don't have files cached and may need to go online.
# This is conveniently triggered by calling file_exists().
file_exists(model,
HF_CONFIG_NAME,
revision=revision,
token=HF_TOKEN)

# Call HF to check if the file exists, with 3 retries and exponential backoff

Check failure on line 208 in vllm/transformers_utils/config.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

vllm/transformers_utils/config.py:208:81: E501 Line too long (89 > 80)
max_retries = 3
retry_delay = 2
for attempt in range(max_retries):
try:
file_exists(model,
HF_CONFIG_NAME,
revision=revision,
token=HF_TOKEN)
except Exception as e:
logger.error(f"Error checking file existence: {e}")

Check failure on line 218 in vllm/transformers_utils/config.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (G004)

vllm/transformers_utils/config.py:218:34: G004 Logging statement uses f-string
if attempt == max_retries - 1:
raise e
time.sleep(retry_delay)
retry_delay *= 2
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto


raise ValueError(f"No supported config format found in {model}")

Expand Down
Loading