diff --git a/docs/source/models.rst b/docs/source/models.rst index 4daee5d5534..82eb3170e78 100644 --- a/docs/source/models.rst +++ b/docs/source/models.rst @@ -89,7 +89,6 @@ You can construct a model with random weights by calling its constructor: vit_b_32 = models.vit_b_32() vit_l_16 = models.vit_l_16() vit_l_32 = models.vit_l_32() - vit_h_14 = models.vit_h_14() We provide pre-trained models, using the PyTorch :mod:`torch.utils.model_zoo`. These can be constructed by passing ``pretrained=True``: @@ -464,7 +463,6 @@ VisionTransformer vit_b_32 vit_l_16 vit_l_32 - vit_h_14 Quantized Models ---------------- diff --git a/hubconf.py b/hubconf.py index 1b3b191efa4..2b2eeb1c166 100644 --- a/hubconf.py +++ b/hubconf.py @@ -63,5 +63,4 @@ vit_b_32, vit_l_16, vit_l_32, - vit_h_14, ) diff --git a/test/expect/ModelTester.test_vit_h_14_expect.pkl b/test/expect/ModelTester.test_vit_h_14_expect.pkl deleted file mode 100644 index 1f846beb6a0..00000000000 Binary files a/test/expect/ModelTester.test_vit_h_14_expect.pkl and /dev/null differ diff --git a/torchvision/models/vision_transformer.py b/torchvision/models/vision_transformer.py index a64f342e1a0..9037b7b1c27 100644 --- a/torchvision/models/vision_transformer.py +++ b/torchvision/models/vision_transformer.py @@ -15,7 +15,6 @@ "vit_b_32", "vit_l_16", "vit_l_32", - "vit_h_14", ] model_urls = { @@ -357,26 +356,6 @@ def vit_l_32(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ) -def vit_h_14(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VisionTransformer: - """ - Constructs a vit_h_14 architecture from - `"An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale" `_. - - NOTE: Pretrained weights are not available for this model. - """ - return _vision_transformer( - arch="vit_h_14", - patch_size=14, - num_layers=32, - num_heads=16, - hidden_dim=1280, - mlp_dim=5120, - pretrained=pretrained, - progress=progress, - **kwargs, - ) - - def interpolate_embeddings( image_size: int, patch_size: int, diff --git a/torchvision/prototype/models/vision_transformer.py b/torchvision/prototype/models/vision_transformer.py index 72330fd1191..3f256842429 100644 --- a/torchvision/prototype/models/vision_transformer.py +++ b/torchvision/prototype/models/vision_transformer.py @@ -19,12 +19,10 @@ "ViT_B_32_Weights", "ViT_L_16_Weights", "ViT_L_32_Weights", - "ViT_H_14_Weights", "vit_b_16", "vit_b_32", "vit_l_16", "vit_l_32", - "vit_h_14", ] @@ -105,11 +103,6 @@ class ViT_L_32_Weights(WeightsEnum): default = ImageNet1K_V1 -class ViT_H_14_Weights(WeightsEnum): - # Weights are not available yet. - pass - - def _vision_transformer( patch_size: int, num_layers: int, @@ -203,19 +196,3 @@ def vit_l_32(*, weights: Optional[ViT_L_32_Weights] = None, progress: bool = Tru progress=progress, **kwargs, ) - - -@handle_legacy_interface(weights=("pretrained", None)) -def vit_h_14(*, weights: Optional[ViT_H_14_Weights] = None, progress: bool = True, **kwargs: Any) -> VisionTransformer: - weights = ViT_H_14_Weights.verify(weights) - - return _vision_transformer( - patch_size=14, - num_layers=32, - num_heads=16, - hidden_dim=1280, - mlp_dim=5120, - weights=weights, - progress=progress, - **kwargs, - )