|
42 | 42 | TextInput, |
43 | 43 | TruncationStrategy, |
44 | 44 | ) |
45 | | -from .utils import PaddingStrategy, add_end_docstrings, logging |
| 45 | +from .utils import PaddingStrategy, add_end_docstrings, is_offline_mode, logging |
46 | 46 |
|
47 | 47 |
|
48 | 48 | logger = logging.get_logger(__name__) |
@@ -219,6 +219,7 @@ def __init__(self, *args, **kwargs): |
219 | 219 |
|
220 | 220 | # Optionally patches mistral tokenizers with wrong regex |
221 | 221 | if vocab_size > 100000 and getattr(self._tokenizer, "pre_tokenizer", None) is not None: |
| 222 | + kwargs.pop("tokenizer", None) |
222 | 223 | self._tokenizer = self._patch_mistral_regex( |
223 | 224 | self._tokenizer, |
224 | 225 | self.init_kwargs.get("name_or_path", None), |
@@ -1089,7 +1090,12 @@ def is_base_mistral(model_id: str) -> bool: |
1089 | 1090 | return True |
1090 | 1091 | return False |
1091 | 1092 |
|
1092 | | - if pretrained_model_name_or_path is not None and (is_local or is_base_mistral(pretrained_model_name_or_path)): |
| 1093 | + if is_offline_mode(): |
| 1094 | + is_local = True |
| 1095 | + |
| 1096 | + if pretrained_model_name_or_path is not None and ( |
| 1097 | + is_local or (not is_local and is_base_mistral(pretrained_model_name_or_path)) |
| 1098 | + ): |
1093 | 1099 | _config_file = cached_file( |
1094 | 1100 | pretrained_model_name_or_path, |
1095 | 1101 | "config.json", |
@@ -1126,7 +1132,7 @@ def is_base_mistral(model_id: str) -> bool: |
1126 | 1132 | ] |
1127 | 1133 | ): |
1128 | 1134 | return tokenizer |
1129 | | - elif transformers_version and version.parse(transformers_version) >= version.parse("5.0.0"): |
| 1135 | + elif transformers_version and version.parse(transformers_version) >= version.parse("4.57.3"): |
1130 | 1136 | return tokenizer |
1131 | 1137 |
|
1132 | 1138 | mistral_config_detected = True |
|
0 commit comments