@@ -1633,42 +1633,49 @@ def _dinov3_cfg(url: str = '', **kwargs) -> Dict[str, Any]:
16331633
16341634 # DINOv3 weights are under a specific license with redistribution terms, please see
16351635 # https://github.com/facebookresearch/dinov3/blob/main/LICENSE.md
1636- 'vit_small_patch16_dinov3.lvdm_1689m ' : _dinov3_cfg (
1637- # hf_hub_id='timm/',
1636+ 'vit_small_patch16_dinov3.lvd_1689m ' : _dinov3_cfg (
1637+ hf_hub_id = 'timm/' ,
16381638 ),
1639- 'vit_small_patch16_dinov3_qkvb.lvdm_1689m ' : _dinov3_cfg (
1640- # hf_hub_id='timm/',
1639+ 'vit_small_patch16_dinov3_qkvb.lvd_1689m ' : _dinov3_cfg (
1640+ hf_hub_id = 'timm/' ,
16411641 ),
1642- 'vit_small_plus_patch16_dinov3.lvdm_1689m ' : _dinov3_cfg (
1643- # hf_hub_id='timm/',
1642+ 'vit_small_plus_patch16_dinov3.lvd_1689m ' : _dinov3_cfg (
1643+ hf_hub_id = 'timm/' ,
16441644 ),
1645- 'vit_small_plus_patch16_dinov3_qkvb.lvdm_1689m ' : _dinov3_cfg (
1646- # hf_hub_id='timm/',
1645+ 'vit_small_plus_patch16_dinov3_qkvb.lvd_1689m ' : _dinov3_cfg (
1646+ hf_hub_id = 'timm/' ,
16471647 ),
1648- 'vit_base_patch16_dinov3.lvdm_1689m ' : _dinov3_cfg (
1649- # hf_hub_id='timm/',
1648+ 'vit_base_patch16_dinov3.lvd_1689m ' : _dinov3_cfg (
1649+ hf_hub_id = 'timm/' ,
16501650 ),
1651- 'vit_base_patch16_dinov3_qkvb.lvdm_1689m ' : _dinov3_cfg (
1652- # hf_hub_id='timm/',
1651+ 'vit_base_patch16_dinov3_qkvb.lvd_1689m ' : _dinov3_cfg (
1652+ hf_hub_id = 'timm/' ,
16531653 ),
1654- 'vit_large_patch16_dinov3.lvdm_1689m ' : _dinov3_cfg (
1655- # hf_hub_id='timm/',
1654+ 'vit_large_patch16_dinov3.lvd_1689m ' : _dinov3_cfg (
1655+ hf_hub_id = 'timm/' ,
16561656 ),
1657- 'vit_large_patch16_dinov3_qkvb.lvdm_1689m ' : _dinov3_cfg (
1658- # hf_hub_id='timm/',
1657+ 'vit_large_patch16_dinov3_qkvb.lvd_1689m ' : _dinov3_cfg (
1658+ hf_hub_id = 'timm/' ,
16591659 ),
16601660 'vit_large_patch16_dinov3.sat_493m' : _dinov3_cfg (
1661- # hf_hub_id='timm/',
1661+ hf_hub_id = 'timm/' ,
1662+ mean = (0.430 , 0.411 , 0.296 ), std = (0.213 , 0.156 , 0.143 ),
1663+ ),
1664+ 'vit_large_patch16_dinov3_qkvb.sat_493m' : _dinov3_cfg (
1665+ hf_hub_id = 'timm/' ,
16621666 mean = (0.430 , 0.411 , 0.296 ), std = (0.213 , 0.156 , 0.143 ),
16631667 ),
1664- 'vit_huge_plus_patch16_dinov3.lvdm_1689m' : _dinov3_cfg (
1665- # hf_hub_id='timm/',
1668+ 'vit_huge_plus_patch16_dinov3.lvd_1689m' : _dinov3_cfg (
1669+ hf_hub_id = 'timm/' ,
1670+ ),
1671+ 'vit_huge_plus_patch16_dinov3_qkvb.lvd_1689m' : _dinov3_cfg (
1672+ hf_hub_id = 'timm/' ,
16661673 ),
1667- 'vit_7b_patch16_dinov3.lvdm_1689m ' : _dinov3_cfg (
1668- # hf_hub_id='timm/',
1674+ 'vit_7b_patch16_dinov3.lvd_1689m ' : _dinov3_cfg (
1675+ hf_hub_id = 'timm/' ,
16691676 ),
16701677 'vit_7b_patch16_dinov3.sat_493m' : _dinov3_cfg (
1671- # hf_hub_id='timm/',
1678+ hf_hub_id = 'timm/' ,
16721679 mean = (0.430 , 0.411 , 0.296 ), std = (0.213 , 0.156 , 0.143 ),
16731680 ),
16741681
@@ -2614,6 +2621,7 @@ def vit_large_patch16_rope_mixed_ape_224(pretrained: bool = False, **kwargs) ->
26142621
26152622@register_model
26162623def vit_small_patch16_dinov3 (pretrained : bool = False , ** kwargs ) -> Eva :
2624+ """DINOv3 S/16 https://arxiv.org/abs/2508.10104"""
26172625 model_args = dict (
26182626 patch_size = 16 ,
26192627 dynamic_img_size = True ,
@@ -2638,6 +2646,7 @@ def vit_small_patch16_dinov3(pretrained: bool = False, **kwargs) -> Eva:
26382646
26392647@register_model
26402648def vit_small_patch16_dinov3_qkvb (pretrained : bool = False , ** kwargs ) -> Eva :
2649+ """DINOv3 S/16 w/ QKV bias enabled (but zero) https://arxiv.org/abs/2508.10104"""
26412650 model_args = dict (
26422651 patch_size = 16 ,
26432652 dynamic_img_size = True ,
@@ -2662,6 +2671,7 @@ def vit_small_patch16_dinov3_qkvb(pretrained: bool = False, **kwargs) -> Eva:
26622671
26632672@register_model
26642673def vit_small_plus_patch16_dinov3 (pretrained : bool = False , ** kwargs ) -> Eva :
2674+ """DINOv3 S/16 Plus https://arxiv.org/abs/2508.10104"""
26652675 model_args = dict (
26662676 patch_size = 16 ,
26672677 dynamic_img_size = True ,
@@ -2688,6 +2698,7 @@ def vit_small_plus_patch16_dinov3(pretrained: bool = False, **kwargs) -> Eva:
26882698
26892699@register_model
26902700def vit_small_plus_patch16_dinov3_qkvb (pretrained : bool = False , ** kwargs ) -> Eva :
2701+ """DINOv3 S/16 Plus w/ QKV bias enabled (but 0) https://arxiv.org/abs/2508.10104"""
26912702 model_args = dict (
26922703 patch_size = 16 ,
26932704 dynamic_img_size = True ,
@@ -2714,6 +2725,7 @@ def vit_small_plus_patch16_dinov3_qkvb(pretrained: bool = False, **kwargs) -> Ev
27142725
27152726@register_model
27162727def vit_base_patch16_dinov3 (pretrained : bool = False , ** kwargs ) -> Eva :
2728+ """DINOv3 B/16 https://arxiv.org/abs/2508.10104"""
27172729 model_args = dict (
27182730 patch_size = 16 ,
27192731 dynamic_img_size = True ,
@@ -2738,7 +2750,7 @@ def vit_base_patch16_dinov3(pretrained: bool = False, **kwargs) -> Eva:
27382750
27392751@register_model
27402752def vit_base_patch16_dinov3_qkvb (pretrained : bool = False , ** kwargs ) -> Eva :
2741- # DINOv3 Base variant w/ qkv_bias enabled (zero'd in weights)
2753+ """ DINOv3 B/16 w/ QKV bias enabled (but zero) https://arxiv.org/abs/2508.10104"""
27422754 model_args = dict (
27432755 patch_size = 16 ,
27442756 dynamic_img_size = True ,
@@ -2763,6 +2775,7 @@ def vit_base_patch16_dinov3_qkvb(pretrained: bool = False, **kwargs) -> Eva:
27632775
27642776@register_model
27652777def vit_large_patch16_dinov3 (pretrained : bool = False , ** kwargs ) -> Eva :
2778+ """DINOv3 L/16 https://arxiv.org/abs/2508.10104"""
27662779 model_args = dict (
27672780 patch_size = 16 ,
27682781 dynamic_img_size = True ,
@@ -2787,10 +2800,11 @@ def vit_large_patch16_dinov3(pretrained: bool = False, **kwargs) -> Eva:
27872800
27882801@register_model
27892802def vit_large_patch16_dinov3_qkvb (pretrained : bool = False , ** kwargs ) -> Eva :
2803+ """DINOv3 w/ QKV bias enabled (but zero) https://arxiv.org/abs/2508.10104"""
27902804 model_args = dict (
27912805 patch_size = 16 ,
27922806 dynamic_img_size = True ,
2793- embed_dim = 768 ,
2807+ embed_dim = 1024 ,
27942808 depth = 24 ,
27952809 num_heads = 16 ,
27962810 qkv_bias = True ,
@@ -2811,6 +2825,7 @@ def vit_large_patch16_dinov3_qkvb(pretrained: bool = False, **kwargs) -> Eva:
28112825
28122826@register_model
28132827def vit_huge_plus_patch16_dinov3 (pretrained : bool = False , ** kwargs ) -> Eva :
2828+ """DINOv3 H/16 Plus https://arxiv.org/abs/2508.10104"""
28142829 model_args = dict (
28152830 patch_size = 16 ,
28162831 dynamic_img_size = True ,
@@ -2836,8 +2851,36 @@ def vit_huge_plus_patch16_dinov3(pretrained: bool = False, **kwargs) -> Eva:
28362851 return model
28372852
28382853
2854+ @register_model
2855+ def vit_huge_plus_patch16_dinov3_qkvb (pretrained : bool = False , ** kwargs ) -> Eva :
2856+ """DINOv3 H/16 Plus w/ QKV bias enabled (but zero) https://arxiv.org/abs/2508.10104"""
2857+ model_args = dict (
2858+ patch_size = 16 ,
2859+ dynamic_img_size = True ,
2860+ embed_dim = 1280 ,
2861+ depth = 32 ,
2862+ num_heads = 20 ,
2863+ qkv_bias = True ,
2864+ init_values = 1.0e-5 , # layer-scale
2865+ rope_type = 'dinov3' ,
2866+ rope_temperature = 100 ,
2867+ use_rot_pos_emb = True ,
2868+ use_abs_pos_emb = False ,
2869+ rope_rotate_half = True ,
2870+ swiglu_mlp = True ,
2871+ swiglu_align_to = 8 ,
2872+ #rope_rescale_coords=2, # haven't added to interface
2873+ num_reg_tokens = 4 ,
2874+ use_fc_norm = False ,
2875+ norm_layer = partial (LayerNorm , eps = 1e-5 ),
2876+ )
2877+
2878+ model = _create_eva ('vit_huge_plus_patch16_dinov3' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2879+ return model
2880+
28392881@register_model
28402882def vit_7b_patch16_dinov3 (pretrained : bool = False , ** kwargs ) -> Eva :
2883+ """DINOv3 7B/16 https://arxiv.org/abs/2508.10104"""
28412884 model_args = dict (
28422885 patch_size = 16 ,
28432886 dynamic_img_size = True ,
0 commit comments