Skip to content

Commit 7c4995f

Browse files
Add feature extraction mapping for automatic metadata update (#28944)
* add feature extraction mapping * added prefix * ruff check * minor fix * Update modeling_auto.py * fix typo * remove prefix to make variable public/importable * Update src/transformers/models/auto/modeling_auto.py Co-authored-by: amyeroberts <[email protected]> * fixes * addressed comments * nit * fix-copies * remove from tests * this should fix * Update tests/models/convnextv2/test_modeling_convnextv2.py Co-authored-by: amyeroberts <[email protected]> * nits --------- Co-authored-by: amyeroberts <[email protected]>
1 parent 2a7746c commit 7c4995f

File tree

9 files changed

+73
-3
lines changed

9 files changed

+73
-3
lines changed

src/transformers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1460,6 +1460,7 @@
14601460
"MODEL_FOR_DEPTH_ESTIMATION_MAPPING",
14611461
"MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING",
14621462
"MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
1463+
"MODEL_FOR_IMAGE_MAPPING",
14631464
"MODEL_FOR_IMAGE_SEGMENTATION_MAPPING",
14641465
"MODEL_FOR_IMAGE_TO_IMAGE_MAPPING",
14651466
"MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING",
@@ -6203,6 +6204,7 @@
62036204
MODEL_FOR_DEPTH_ESTIMATION_MAPPING,
62046205
MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
62056206
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
6207+
MODEL_FOR_IMAGE_MAPPING,
62066208
MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
62076209
MODEL_FOR_IMAGE_TO_IMAGE_MAPPING,
62086210
MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,

src/transformers/models/auto/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
"MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING",
5050
"MODEL_FOR_DEPTH_ESTIMATION_MAPPING",
5151
"MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
52+
"MODEL_FOR_IMAGE_MAPPING",
5253
"MODEL_FOR_IMAGE_SEGMENTATION_MAPPING",
5354
"MODEL_FOR_IMAGE_TO_IMAGE_MAPPING",
5455
"MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING",
@@ -233,6 +234,7 @@
233234
MODEL_FOR_DEPTH_ESTIMATION_MAPPING,
234235
MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
235236
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
237+
MODEL_FOR_IMAGE_MAPPING,
236238
MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
237239
MODEL_FOR_IMAGE_TO_IMAGE_MAPPING,
238240
MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,

src/transformers/models/auto/modeling_auto.py

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929

3030
logger = logging.get_logger(__name__)
3131

32-
3332
MODEL_MAPPING_NAMES = OrderedDict(
3433
[
3534
# Base model mapping
@@ -478,6 +477,58 @@
478477
]
479478
)
480479

480+
MODEL_FOR_IMAGE_MAPPING_NAMES = OrderedDict(
481+
[
482+
# Model for Image mapping
483+
("beit", "BeitModel"),
484+
("bit", "BitModel"),
485+
("conditional_detr", "ConditionalDetrModel"),
486+
("convnext", "ConvNextModel"),
487+
("convnextv2", "ConvNextV2Model"),
488+
("data2vec-vision", "Data2VecVisionModel"),
489+
("deformable_detr", "DeformableDetrModel"),
490+
("deit", "DeiTModel"),
491+
("deta", "DetaModel"),
492+
("detr", "DetrModel"),
493+
("dinat", "DinatModel"),
494+
("dinov2", "Dinov2Model"),
495+
("dpt", "DPTModel"),
496+
("efficientformer", "EfficientFormerModel"),
497+
("efficientnet", "EfficientNetModel"),
498+
("focalnet", "FocalNetModel"),
499+
("glpn", "GLPNModel"),
500+
("imagegpt", "ImageGPTModel"),
501+
("levit", "LevitModel"),
502+
("mobilenet_v1", "MobileNetV1Model"),
503+
("mobilenet_v2", "MobileNetV2Model"),
504+
("mobilevit", "MobileViTModel"),
505+
("mobilevitv2", "MobileViTV2Model"),
506+
("nat", "NatModel"),
507+
("poolformer", "PoolFormerModel"),
508+
("pvt", "PvtModel"),
509+
("regnet", "RegNetModel"),
510+
("resnet", "ResNetModel"),
511+
("segformer", "SegformerModel"),
512+
("siglip_vision_model", "SiglipVisionModel"),
513+
("swiftformer", "SwiftFormerModel"),
514+
("swin", "SwinModel"),
515+
("swin2sr", "Swin2SRModel"),
516+
("swinv2", "Swinv2Model"),
517+
("table-transformer", "TableTransformerModel"),
518+
("timesformer", "TimesformerModel"),
519+
("timm_backbone", "TimmBackbone"),
520+
("van", "VanModel"),
521+
("videomae", "VideoMAEModel"),
522+
("vit", "ViTModel"),
523+
("vit_hybrid", "ViTHybridModel"),
524+
("vit_mae", "ViTMAEModel"),
525+
("vit_msn", "ViTMSNModel"),
526+
("vitdet", "VitDetModel"),
527+
("vivit", "VivitModel"),
528+
("yolos", "YolosModel"),
529+
]
530+
)
531+
481532
MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES = OrderedDict(
482533
[
483534
("deit", "DeiTForMaskedImageModeling"),
@@ -1243,6 +1294,7 @@
12431294
CONFIG_MAPPING_NAMES, MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES
12441295
)
12451296
MODEL_FOR_MASKED_LM_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_MASKED_LM_MAPPING_NAMES)
1297+
MODEL_FOR_IMAGE_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_IMAGE_MAPPING_NAMES)
12461298
MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING = _LazyAutoMapping(
12471299
CONFIG_MAPPING_NAMES, MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES
12481300
)

src/transformers/trainer.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,10 @@
6363
from .integrations.tpu import tpu_spmd_dataloader
6464
from .modelcard import TrainingSummary
6565
from .modeling_utils import PreTrainedModel, load_sharded_checkpoint, unwrap_model
66-
from .models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES, MODEL_MAPPING_NAMES
66+
from .models.auto.modeling_auto import (
67+
MODEL_FOR_CAUSAL_LM_MAPPING_NAMES,
68+
MODEL_MAPPING_NAMES,
69+
)
6770
from .optimization import Adafactor, get_scheduler
6871
from .pytorch_utils import ALL_LAYERNORM_LAYERS, is_torch_greater_or_equal_than_1_13
6972
from .tokenization_utils_base import PreTrainedTokenizerBase

src/transformers/utils/dummy_pt_objects.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -598,6 +598,9 @@ def __init__(self, *args, **kwargs):
598598
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING = None
599599

600600

601+
MODEL_FOR_IMAGE_MAPPING = None
602+
603+
601604
MODEL_FOR_IMAGE_SEGMENTATION_MAPPING = None
602605

603606

src/transformers/utils/fx.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
MODEL_FOR_CTC_MAPPING_NAMES,
4040
MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES,
4141
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES,
42+
MODEL_FOR_IMAGE_MAPPING_NAMES,
4243
MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES,
4344
MODEL_FOR_MASKED_LM_MAPPING_NAMES,
4445
MODEL_FOR_MULTIPLE_CHOICE_MAPPING_NAMES,
@@ -95,6 +96,7 @@ def _generate_supported_model_class_names(
9596
"audio-classification": MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES,
9697
"semantic-segmentation": MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES,
9798
"backbone": MODEL_FOR_BACKBONE_MAPPING_NAMES,
99+
"image-feature-extraction": MODEL_FOR_IMAGE_MAPPING_NAMES,
98100
}
99101

100102
if supported_tasks is None:

tests/test_modeling_common.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -700,7 +700,10 @@ def check_training_gradient_checkpointing(self, gradient_checkpointing_kwargs=No
700700
for model_class in self.all_model_classes:
701701
if (
702702
model_class.__name__
703-
in [*get_values(MODEL_MAPPING_NAMES), *get_values(MODEL_FOR_BACKBONE_MAPPING_NAMES)]
703+
in [
704+
*get_values(MODEL_MAPPING_NAMES),
705+
*get_values(MODEL_FOR_BACKBONE_MAPPING_NAMES),
706+
]
704707
or not model_class.supports_gradient_checkpointing
705708
):
706709
continue

utils/check_repo.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -732,6 +732,8 @@ def check_all_auto_object_names_being_defined():
732732
# module, if it's a private model defined in this file.
733733
if name.endswith("MODEL_MAPPING_NAMES") and is_a_private_model(class_name):
734734
continue
735+
if name.endswith("MODEL_FOR_IMAGE_MAPPING_NAMES") and is_a_private_model(class_name):
736+
continue
735737
failures.append(
736738
f"`{class_name}` appears in the mapping `{name}` but it is not defined in the library."
737739
)

utils/update_metadata.py

100644100755
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
PIPELINE_TAGS_AND_AUTO_MODELS = [
6363
("pretraining", "MODEL_FOR_PRETRAINING_MAPPING_NAMES", "AutoModelForPreTraining"),
6464
("feature-extraction", "MODEL_MAPPING_NAMES", "AutoModel"),
65+
("image-feature-extraction", "MODEL_FOR_IMAGE_MAPPING_NAMES", "AutoModel"),
6566
("audio-classification", "MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES", "AutoModelForAudioClassification"),
6667
("text-generation", "MODEL_FOR_CAUSAL_LM_MAPPING_NAMES", "AutoModelForCausalLM"),
6768
("automatic-speech-recognition", "MODEL_FOR_CTC_MAPPING_NAMES", "AutoModelForCTC"),

0 commit comments

Comments
 (0)