Skip to content

Commit 0a1ff97

Browse files
committed
Merge branch 'martinsbruveris-add-vit-b8'
2 parents cfa414c + 9b5d6dc commit 0a1ff97

File tree

1 file changed

+28
-0
lines changed

1 file changed

+28
-0
lines changed

timm/models/vision_transformer.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,9 @@ def _cfg(url='', **kwargs):
8888
url='https://storage.googleapis.com/vit_models/augreg/'
8989
'B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_384.npz',
9090
input_size=(3, 384, 384), crop_pct=1.0),
91+
'vit_base_patch8_224': _cfg(
92+
url='https://storage.googleapis.com/vit_models/augreg/'
93+
'B_8-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_224.npz'),
9194
'vit_large_patch32_224': _cfg(
9295
url='', # no official model weights for this combo, only for in21k
9396
),
@@ -118,6 +121,9 @@ def _cfg(url='', **kwargs):
118121
'vit_base_patch16_224_in21k': _cfg(
119122
url='https://storage.googleapis.com/vit_models/augreg/B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0.npz',
120123
num_classes=21843),
124+
'vit_base_patch8_224_in21k': _cfg(
125+
url='https://storage.googleapis.com/vit_models/augreg/B_8-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0.npz',
126+
num_classes=21843),
121127
'vit_large_patch32_224_in21k': _cfg(
122128
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_patch32_224_in21k-9046d2e7.pth',
123129
num_classes=21843),
@@ -640,6 +646,16 @@ def vit_base_patch16_384(pretrained=False, **kwargs):
640646
return model
641647

642648

649+
@register_model
650+
def vit_base_patch8_224(pretrained=False, **kwargs):
651+
""" ViT-Base (ViT-B/8) from original paper (https://arxiv.org/abs/2010.11929).
652+
ImageNet-1k weights fine-tuned from in21k @ 224x224, source https://github.com/google-research/vision_transformer.
653+
"""
654+
model_kwargs = dict(patch_size=8, embed_dim=768, depth=12, num_heads=12, **kwargs)
655+
model = _create_vision_transformer('vit_base_patch8_224', pretrained=pretrained, **model_kwargs)
656+
return model
657+
658+
643659
@register_model
644660
def vit_large_patch32_224(pretrained=False, **kwargs):
645661
""" ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929). No pretrained weights.
@@ -756,6 +772,18 @@ def vit_base_patch16_224_in21k(pretrained=False, **kwargs):
756772
return model
757773

758774

775+
@register_model
776+
def vit_base_patch8_224_in21k(pretrained=False, **kwargs):
777+
""" ViT-Base model (ViT-B/8) from original paper (https://arxiv.org/abs/2010.11929).
778+
ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
779+
NOTE: this model has valid 21k classifier head and no representation (pre-logits) layer
780+
"""
781+
model_kwargs = dict(
782+
patch_size=8, embed_dim=768, depth=12, num_heads=12, **kwargs)
783+
model = _create_vision_transformer('vit_base_patch8_224_in21k', pretrained=pretrained, **model_kwargs)
784+
return model
785+
786+
759787
@register_model
760788
def vit_large_patch32_224_in21k(pretrained=False, **kwargs):
761789
""" ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929).

0 commit comments

Comments
 (0)