|
29 | 29 |
|
30 | 30 | logger = logging.get_logger(__name__) |
31 | 31 |
|
32 | | - |
33 | 32 | MODEL_MAPPING_NAMES = OrderedDict( |
34 | 33 | [ |
35 | 34 | # Base model mapping |
|
478 | 477 | ] |
479 | 478 | ) |
480 | 479 |
|
| 480 | +MODEL_FOR_IMAGE_MAPPING_NAMES = OrderedDict( |
| 481 | + [ |
| 482 | + # Model for Image mapping |
| 483 | + ("beit", "BeitModel"), |
| 484 | + ("bit", "BitModel"), |
| 485 | + ("conditional_detr", "ConditionalDetrModel"), |
| 486 | + ("convnext", "ConvNextModel"), |
| 487 | + ("convnextv2", "ConvNextV2Model"), |
| 488 | + ("data2vec-vision", "Data2VecVisionModel"), |
| 489 | + ("deformable_detr", "DeformableDetrModel"), |
| 490 | + ("deit", "DeiTModel"), |
| 491 | + ("deta", "DetaModel"), |
| 492 | + ("detr", "DetrModel"), |
| 493 | + ("dinat", "DinatModel"), |
| 494 | + ("dinov2", "Dinov2Model"), |
| 495 | + ("dpt", "DPTModel"), |
| 496 | + ("efficientformer", "EfficientFormerModel"), |
| 497 | + ("efficientnet", "EfficientNetModel"), |
| 498 | + ("focalnet", "FocalNetModel"), |
| 499 | + ("glpn", "GLPNModel"), |
| 500 | + ("imagegpt", "ImageGPTModel"), |
| 501 | + ("levit", "LevitModel"), |
| 502 | + ("mobilenet_v1", "MobileNetV1Model"), |
| 503 | + ("mobilenet_v2", "MobileNetV2Model"), |
| 504 | + ("mobilevit", "MobileViTModel"), |
| 505 | + ("mobilevitv2", "MobileViTV2Model"), |
| 506 | + ("nat", "NatModel"), |
| 507 | + ("poolformer", "PoolFormerModel"), |
| 508 | + ("pvt", "PvtModel"), |
| 509 | + ("regnet", "RegNetModel"), |
| 510 | + ("resnet", "ResNetModel"), |
| 511 | + ("segformer", "SegformerModel"), |
| 512 | + ("siglip_vision_model", "SiglipVisionModel"), |
| 513 | + ("swiftformer", "SwiftFormerModel"), |
| 514 | + ("swin", "SwinModel"), |
| 515 | + ("swin2sr", "Swin2SRModel"), |
| 516 | + ("swinv2", "Swinv2Model"), |
| 517 | + ("table-transformer", "TableTransformerModel"), |
| 518 | + ("timesformer", "TimesformerModel"), |
| 519 | + ("timm_backbone", "TimmBackbone"), |
| 520 | + ("van", "VanModel"), |
| 521 | + ("videomae", "VideoMAEModel"), |
| 522 | + ("vit", "ViTModel"), |
| 523 | + ("vit_hybrid", "ViTHybridModel"), |
| 524 | + ("vit_mae", "ViTMAEModel"), |
| 525 | + ("vit_msn", "ViTMSNModel"), |
| 526 | + ("vitdet", "VitDetModel"), |
| 527 | + ("vivit", "VivitModel"), |
| 528 | + ("yolos", "YolosModel"), |
| 529 | + ] |
| 530 | +) |
| 531 | + |
481 | 532 | MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES = OrderedDict( |
482 | 533 | [ |
483 | 534 | ("deit", "DeiTForMaskedImageModeling"), |
|
1243 | 1294 | CONFIG_MAPPING_NAMES, MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES |
1244 | 1295 | ) |
1245 | 1296 | MODEL_FOR_MASKED_LM_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_MASKED_LM_MAPPING_NAMES) |
| 1297 | +MODEL_FOR_IMAGE_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_IMAGE_MAPPING_NAMES) |
1246 | 1298 | MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING = _LazyAutoMapping( |
1247 | 1299 | CONFIG_MAPPING_NAMES, MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES |
1248 | 1300 | ) |
|
0 commit comments