Skip to content

Commit 1e83c57

Browse files
YosuaMichaelfacebook-github-bot
authored andcommitted
[fbsync] Document all remaining pre-trained weights (#6039)
Summary: * Adding docs for quantized models. * Adding docs for video models. * Adding docs for segmentation models. * Adding docs for optical flow models. * Adding docs for detection models. * Fix typo. * Make changes from code-review. Reviewed By: NicolasHug Differential Revision: D36760944 fbshipit-source-id: ed369c883495d532aacf4de087e65d07b8660ba0
1 parent 5a5d24b commit 1e83c57

File tree

20 files changed

+79
-19
lines changed

20 files changed

+79
-19
lines changed

docs/source/conf.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -345,9 +345,7 @@ def inject_weight_metadata(app, what, name, obj, options, lines):
345345
metrics = meta.pop("metrics", {})
346346
meta_with_metrics = dict(meta, **metrics)
347347

348-
custom_docs = meta_with_metrics.pop("_docs", None) # Custom per-Weights docs
349-
if custom_docs is not None:
350-
lines += [custom_docs]
348+
lines += [meta_with_metrics.pop("_docs")]
351349

352350
if field == obj.DEFAULT:
353351
lines += [f"Also available as ``{obj.__name__}.DEFAULT``."]

test/test_extended_models.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,8 @@ def test_schema_meta_validation(model_fn):
9595
# mandatory fields for each computer vision task
9696
classification_fields = {"categories", ("metrics", "acc@1"), ("metrics", "acc@5")}
9797
defaults = {
98-
"all": {"metrics", "min_size", "num_params", "recipe"},
99-
"models": classification_fields | {"_docs"},
98+
"all": {"metrics", "min_size", "num_params", "recipe", "_docs"},
99+
"models": classification_fields,
100100
"detection": {"categories", ("metrics", "box_map")},
101101
"quantization": classification_fields | {"backend", "unquantized"},
102102
"segmentation": {"categories", ("metrics", "miou"), ("metrics", "pixel_acc")},

torchvision/models/detection/faster_rcnn.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,7 @@ class FasterRCNN_ResNet50_FPN_Weights(WeightsEnum):
386386
"metrics": {
387387
"box_map": 37.0,
388388
},
389+
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
389390
},
390391
)
391392
DEFAULT = COCO_V1
@@ -402,6 +403,7 @@ class FasterRCNN_ResNet50_FPN_V2_Weights(WeightsEnum):
402403
"metrics": {
403404
"box_map": 46.7,
404405
},
406+
"_docs": """These weights were produced using an enhanced training recipe to boost the model accuracy.""",
405407
},
406408
)
407409
DEFAULT = COCO_V1
@@ -418,6 +420,7 @@ class FasterRCNN_MobileNet_V3_Large_FPN_Weights(WeightsEnum):
418420
"metrics": {
419421
"box_map": 32.8,
420422
},
423+
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
421424
},
422425
)
423426
DEFAULT = COCO_V1
@@ -434,6 +437,7 @@ class FasterRCNN_MobileNet_V3_Large_320_FPN_Weights(WeightsEnum):
434437
"metrics": {
435438
"box_map": 22.8,
436439
},
440+
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
437441
},
438442
)
439443
DEFAULT = COCO_V1
@@ -454,7 +458,7 @@ def fasterrcnn_resnet50_fpn(
454458
) -> FasterRCNN:
455459
"""
456460
Faster R-CNN model with a ResNet-50-FPN backbone from the `Faster R-CNN: Towards Real-Time Object
457-
Detection with Region Proposal Networks <https://arxiv.org/abs/1703.06870>`__
461+
Detection with Region Proposal Networks <https://arxiv.org/abs/1506.01497>`__
458462
paper.
459463
460464
The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each

torchvision/models/detection/fcos.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -661,6 +661,7 @@ class FCOS_ResNet50_FPN_Weights(WeightsEnum):
661661
"metrics": {
662662
"box_map": 39.2,
663663
},
664+
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
664665
},
665666
)
666667
DEFAULT = COCO_V1

torchvision/models/detection/keypoint_rcnn.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,10 @@ class KeypointRCNN_ResNet50_FPN_Weights(WeightsEnum):
326326
"box_map": 50.6,
327327
"kp_map": 61.1,
328328
},
329+
"_docs": """
330+
These weights were produced by following a similar training recipe as on the paper but use a checkpoint
331+
from an early epoch.
332+
""",
329333
},
330334
)
331335
COCO_V1 = Weights(
@@ -339,6 +343,7 @@ class KeypointRCNN_ResNet50_FPN_Weights(WeightsEnum):
339343
"box_map": 54.6,
340344
"kp_map": 65.0,
341345
},
346+
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
342347
},
343348
)
344349
DEFAULT = COCO_V1

torchvision/models/detection/mask_rcnn.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,7 @@ class MaskRCNN_ResNet50_FPN_Weights(WeightsEnum):
368368
"box_map": 37.9,
369369
"mask_map": 34.6,
370370
},
371+
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
371372
},
372373
)
373374
DEFAULT = COCO_V1
@@ -385,6 +386,7 @@ class MaskRCNN_ResNet50_FPN_V2_Weights(WeightsEnum):
385386
"box_map": 47.4,
386387
"mask_map": 41.8,
387388
},
389+
"_docs": """These weights were produced using an enhanced training recipe to boost the model accuracy.""",
388390
},
389391
)
390392
DEFAULT = COCO_V1

torchvision/models/detection/retinanet.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -690,6 +690,7 @@ class RetinaNet_ResNet50_FPN_Weights(WeightsEnum):
690690
"metrics": {
691691
"box_map": 36.4,
692692
},
693+
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
693694
},
694695
)
695696
DEFAULT = COCO_V1
@@ -706,6 +707,7 @@ class RetinaNet_ResNet50_FPN_V2_Weights(WeightsEnum):
706707
"metrics": {
707708
"box_map": 41.5,
708709
},
710+
"_docs": """These weights were produced using an enhanced training recipe to boost the model accuracy.""",
709711
},
710712
)
711713
DEFAULT = COCO_V1

torchvision/models/detection/ssd.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ class SSD300_VGG16_Weights(WeightsEnum):
3737
"metrics": {
3838
"box_map": 25.1,
3939
},
40+
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
4041
},
4142
)
4243
DEFAULT = COCO_V1

torchvision/models/detection/ssdlite.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ class SSDLite320_MobileNet_V3_Large_Weights(WeightsEnum):
196196
"metrics": {
197197
"box_map": 21.3,
198198
},
199+
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
199200
},
200201
)
201202
DEFAULT = COCO_V1

torchvision/models/optical_flow/raft.py

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,7 @@ def forward(self, image1, image2, num_flow_updates: int = 12):
518518

519519
class Raft_Large_Weights(WeightsEnum):
520520
C_T_V1 = Weights(
521-
# Chairs + Things, ported from original paper repo (raft-things.pth)
521+
# Weights ported from https://github.com/princeton-vl/RAFT
522522
url="https://download.pytorch.org/models/raft_large_C_T_V1-22a6c225.pth",
523523
transforms=OpticalFlow,
524524
meta={
@@ -531,11 +531,11 @@ class Raft_Large_Weights(WeightsEnum):
531531
"kitti_train_per_image_epe": 5.0172,
532532
"kitti_train_fl_all": 17.4506,
533533
},
534+
"_docs": """These weights were ported from the original paper. They are trained on Chairs + Things.""",
534535
},
535536
)
536537

537538
C_T_V2 = Weights(
538-
# Chairs + Things
539539
url="https://download.pytorch.org/models/raft_large_C_T_V2-1bb1363a.pth",
540540
transforms=OpticalFlow,
541541
meta={
@@ -548,11 +548,12 @@ class Raft_Large_Weights(WeightsEnum):
548548
"kitti_train_per_image_epe": 4.5118,
549549
"kitti_train_fl_all": 16.0679,
550550
},
551+
"_docs": """These weights were trained from scratch on Chairs + Things.""",
551552
},
552553
)
553554

554555
C_T_SKHT_V1 = Weights(
555-
# Chairs + Things + Sintel fine-tuning, ported from original paper repo (raft-sintel.pth)
556+
# Weights ported from https://github.com/princeton-vl/RAFT
556557
url="https://download.pytorch.org/models/raft_large_C_T_SKHT_V1-0b8c9e55.pth",
557558
transforms=OpticalFlow,
558559
meta={
@@ -563,13 +564,14 @@ class Raft_Large_Weights(WeightsEnum):
563564
"sintel_test_cleanpass_epe": 1.94,
564565
"sintel_test_finalpass_epe": 3.18,
565566
},
567+
"_docs": """
568+
These weights were ported from the original paper. They are trained on Chairs + Things and fine-tuned on
569+
Sintel (C+T+S+K+H).
570+
""",
566571
},
567572
)
568573

569574
C_T_SKHT_V2 = Weights(
570-
# Chairs + Things + Sintel fine-tuning, i.e.:
571-
# Chairs + Things + (Sintel + Kitti + HD1K + Things_clean)
572-
# Corresponds to the C+T+S+K+H on paper with fine-tuning on Sintel
573575
url="https://download.pytorch.org/models/raft_large_C_T_SKHT_V2-ff5fadd5.pth",
574576
transforms=OpticalFlow,
575577
meta={
@@ -580,11 +582,14 @@ class Raft_Large_Weights(WeightsEnum):
580582
"sintel_test_cleanpass_epe": 1.819,
581583
"sintel_test_finalpass_epe": 3.067,
582584
},
585+
"_docs": """
586+
These weights were trained from scratch on Chairs + Things and fine-tuned on Sintel (C+T+S+K+H).
587+
""",
583588
},
584589
)
585590

586591
C_T_SKHT_K_V1 = Weights(
587-
# Chairs + Things + Sintel fine-tuning + Kitti fine-tuning, ported from the original repo (sintel-kitti.pth)
592+
# Weights ported from https://github.com/princeton-vl/RAFT
588593
url="https://download.pytorch.org/models/raft_large_C_T_SKHT_K_V1-4a6a5039.pth",
589594
transforms=OpticalFlow,
590595
meta={
@@ -594,14 +599,14 @@ class Raft_Large_Weights(WeightsEnum):
594599
"metrics": {
595600
"kitti_test_fl_all": 5.10,
596601
},
602+
"_docs": """
603+
These weights were ported from the original paper. They are trained on Chairs + Things, fine-tuned on
604+
Sintel and then on Kitti.
605+
""",
597606
},
598607
)
599608

600609
C_T_SKHT_K_V2 = Weights(
601-
# Chairs + Things + Sintel fine-tuning + Kitti fine-tuning i.e.:
602-
# Chairs + Things + (Sintel + Kitti + HD1K + Things_clean) + Kitti
603-
# Same as CT_SKHT with extra fine-tuning on Kitti
604-
# Corresponds to the C+T+S+K+H on paper with fine-tuning on Sintel and then on Kitti
605610
url="https://download.pytorch.org/models/raft_large_C_T_SKHT_K_V2-b5c70766.pth",
606611
transforms=OpticalFlow,
607612
meta={
@@ -611,6 +616,9 @@ class Raft_Large_Weights(WeightsEnum):
611616
"metrics": {
612617
"kitti_test_fl_all": 5.19,
613618
},
619+
"_docs": """
620+
These weights were trained from scratch on Chairs + Things, fine-tuned on Sintel and then on Kitti.
621+
""",
614622
},
615623
)
616624

@@ -619,7 +627,7 @@ class Raft_Large_Weights(WeightsEnum):
619627

620628
class Raft_Small_Weights(WeightsEnum):
621629
C_T_V1 = Weights(
622-
# Chairs + Things, ported from original paper repo (raft-small.pth)
630+
# Weights ported from https://github.com/princeton-vl/RAFT
623631
url="https://download.pytorch.org/models/raft_small_C_T_V1-ad48884c.pth",
624632
transforms=OpticalFlow,
625633
meta={
@@ -632,10 +640,10 @@ class Raft_Small_Weights(WeightsEnum):
632640
"kitti_train_per_image_epe": 7.6557,
633641
"kitti_train_fl_all": 25.2801,
634642
},
643+
"_docs": """These weights were ported from the original paper. They are trained on Chairs + Things.""",
635644
},
636645
)
637646
C_T_V2 = Weights(
638-
# Chairs + Things
639647
url="https://download.pytorch.org/models/raft_small_C_T_V2-01064c6d.pth",
640648
transforms=OpticalFlow,
641649
meta={
@@ -648,6 +656,7 @@ class Raft_Small_Weights(WeightsEnum):
648656
"kitti_train_per_image_epe": 7.5978,
649657
"kitti_train_fl_all": 25.2369,
650658
},
659+
"_docs": """These weights were trained from scratch on Chairs + Things.""",
651660
},
652661
)
653662

torchvision/models/quantization/googlenet.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,10 @@ class GoogLeNet_QuantizedWeights(WeightsEnum):
121121
"acc@1": 69.826,
122122
"acc@5": 89.404,
123123
},
124+
"_docs": """
125+
These weights were produced by doing Post Training Quantization (eager mode) on top of the unquantized
126+
weights listed below.
127+
""",
124128
},
125129
)
126130
DEFAULT = IMAGENET1K_FBGEMM_V1

torchvision/models/quantization/inception.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,10 @@ class Inception_V3_QuantizedWeights(WeightsEnum):
187187
"acc@1": 77.176,
188188
"acc@5": 93.354,
189189
},
190+
"_docs": """
191+
These weights were produced by doing Post Training Quantization (eager mode) on top of the unquantized
192+
weights listed below.
193+
""",
190194
},
191195
)
192196
DEFAULT = IMAGENET1K_FBGEMM_V1

torchvision/models/quantization/mobilenetv2.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,10 @@ class MobileNet_V2_QuantizedWeights(WeightsEnum):
7979
"acc@1": 71.658,
8080
"acc@5": 90.150,
8181
},
82+
"_docs": """
83+
These weights were produced by doing Quantization Aware Training (eager mode) on top of the unquantized
84+
weights listed below.
85+
""",
8286
},
8387
)
8488
DEFAULT = IMAGENET1K_QNNPACK_V1

torchvision/models/quantization/mobilenetv3.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,10 @@ class MobileNet_V3_Large_QuantizedWeights(WeightsEnum):
173173
"acc@1": 73.004,
174174
"acc@5": 90.858,
175175
},
176+
"_docs": """
177+
These weights were produced by doing Quantization Aware Training (eager mode) on top of the unquantized
178+
weights listed below.
179+
""",
176180
},
177181
)
178182
DEFAULT = IMAGENET1K_QNNPACK_V1

torchvision/models/quantization/resnet.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,10 @@ def _resnet(
154154
"categories": _IMAGENET_CATEGORIES,
155155
"backend": "fbgemm",
156156
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#post-training-quantized-models",
157+
"_docs": """
158+
These weights were produced by doing Post Training Quantization (eager mode) on top of the unquantized
159+
weights listed below.
160+
""",
157161
}
158162

159163

torchvision/models/quantization/shufflenetv2.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,10 @@ def _shufflenetv2(
118118
"categories": _IMAGENET_CATEGORIES,
119119
"backend": "fbgemm",
120120
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#post-training-quantized-models",
121+
"_docs": """
122+
These weights were produced by doing Post Training Quantization (eager mode) on top of the unquantized
123+
weights listed below.
124+
""",
121125
}
122126

123127

torchvision/models/segmentation/deeplabv3.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,10 @@ def _deeplabv3_resnet(
131131
_COMMON_META = {
132132
"categories": _VOC_CATEGORIES,
133133
"min_size": (1, 1),
134+
"_docs": """
135+
These weights were trained on a subset of COCO, using only the 20 categories that are present in the Pascal VOC
136+
dataset.
137+
""",
134138
}
135139

136140

torchvision/models/segmentation/fcn.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ def __init__(self, in_channels: int, channels: int) -> None:
5050
_COMMON_META = {
5151
"categories": _VOC_CATEGORIES,
5252
"min_size": (1, 1),
53+
"_docs": """
54+
These weights were trained on a subset of COCO, using only the 20 categories that are present in the Pascal VOC
55+
dataset.
56+
""",
5357
}
5458

5559

torchvision/models/segmentation/lraspp.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,10 @@ class LRASPP_MobileNet_V3_Large_Weights(WeightsEnum):
106106
"miou": 57.9,
107107
"pixel_acc": 91.2,
108108
},
109+
"_docs": """
110+
These weights were trained on a subset of COCO, using only the 20 categories that are present in the
111+
Pascal VOC dataset.
112+
""",
109113
},
110114
)
111115
DEFAULT = COCO_WITH_VOC_LABELS_V1

torchvision/models/video/resnet.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,7 @@ def _video_resnet(
312312
"min_size": (1, 1),
313313
"categories": _KINETICS400_CATEGORIES,
314314
"recipe": "https://github.com/pytorch/vision/tree/main/references/video_classification",
315+
"_docs": """These weights reproduce closely the accuracy of the paper for 16-frame clip inputs.""",
315316
}
316317

317318

0 commit comments

Comments
 (0)