Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 48 additions & 13 deletions vllm/transformers_utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import enum
import json
import os
import time
from pathlib import Path
from typing import Any, Dict, Literal, Optional, Type, Union

Expand Down Expand Up @@ -100,15 +101,33 @@ def file_or_path_exists(model: Union[str, Path], config_name: str,

# NB: file_exists will only check for the existence of the config file on
# hf_hub. This will fail in offline mode.
try:
return file_exists(model,
config_name,
revision=revision,
token=HF_TOKEN)
except huggingface_hub.errors.OfflineModeIsEnabled:
# Don't raise in offline mode, all we know is that we don't have this
# file cached.
return False

# Call HF to check if the file exists
# 2 retries and exponential backoff
max_retries = 2
retry_delay = 2
for attempt in range(max_retries):
try:
return file_exists(model,
config_name,
revision=revision,
token=HF_TOKEN)
except huggingface_hub.errors.OfflineModeIsEnabled:
# Don't raise in offline mode,
# all we know is that we don't have this
# file cached.
return False
except Exception as e:
logger.error(
"Error checking file existence: %s, retrying %d of %d", e,
attempt + 1, max_retries)
if attempt == max_retries - 1:
logger.error("Error checking file existence: %s", e)
raise
time.sleep(retry_delay)
retry_delay *= 2
continue
return False


def patch_rope_scaling(config: PretrainedConfig) -> None:
Expand Down Expand Up @@ -193,10 +212,26 @@ def get_config(
# raise an offline mode error to indicate to the user that they
# don't have files cached and may need to go online.
# This is conveniently triggered by calling file_exists().
file_exists(model,
HF_CONFIG_NAME,
revision=revision,
token=HF_TOKEN)

# Call HF to check if the file exists
# 2 retries and exponential backoff
max_retries = 2
retry_delay = 2
for attempt in range(max_retries):
try:
file_exists(model,
HF_CONFIG_NAME,
revision=revision,
token=HF_TOKEN)
except Exception as e:
logger.error(
"Error checking file existence: %s, retrying %d of %d",
e, attempt + 1, max_retries)
if attempt == max_retries:
logger.error("Error checking file existence: %s", e)
raise e
time.sleep(retry_delay)
retry_delay *= 2

raise ValueError(f"No supported config format found in {model}")

Expand Down