diff --git a/docs/source/conf.py b/docs/source/conf.py index 231d3cad416..2bba57bb580 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -375,7 +375,16 @@ def inject_weight_metadata(app, what, name, obj, options, lines): lines.append("") -def generate_weights_table(module, table_name, metrics, dataset, include_patterns=None, exclude_patterns=None): +def generate_weights_table( + module, + table_name, + metrics, + dataset, + include_patterns=None, + exclude_patterns=None, + table_description="", + title_character="-", +): weights_endswith = "_QuantizedWeights" if module.__name__.split(".")[-1] == "quantization" else "_Weights" weight_enums = [getattr(module, name) for name in dir(module) if name.endswith(weights_endswith)] weights = [w for weight_enum in weight_enums for w in weight_enum] @@ -403,27 +412,39 @@ def generate_weights_table(module, table_name, metrics, dataset, include_pattern generated_dir = Path("generated") generated_dir.mkdir(exist_ok=True) with open(generated_dir / f"{table_name}_table.rst", "w+") as table_file: + table_file.write( + f"Table of all available {table_name.replace('_',' ').title()} Weights \n{(32 + len(table_name))*title_character}\n" + ) + table_file.write(f"{table_description}\n\n") table_file.write(".. rst-class:: table-weights\n") # Custom CSS class, see custom_torchvision.css - table_file.write(".. table::\n") + table_file.write(f".. table:: {table_name}\n") table_file.write(f" :widths: 100 {'20 ' * len(metrics_names)} 20 10\n\n") table_file.write(f"{textwrap.indent(table, ' ' * 4)}\n\n") generate_weights_table( - module=M, table_name="classification", metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")], dataset="ImageNet-1K" + module=M, + table_name="classification", + metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")], + dataset="ImageNet-1K", + table_description="Accuracies are reported on ImageNet-1K using single crops:", ) generate_weights_table( module=M.quantization, - table_name="classification_quant", + table_name="quantized_classification", metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")], dataset="ImageNet-1K", + table_description="Accuracies are reported on ImageNet-1K using single crops:", + title_character="^", ) generate_weights_table( module=M.detection, - table_name="detection", + table_name="object_detection", metrics=[("box_map", "Box MAP")], exclude_patterns=["Mask", "Keypoint"], dataset="COCO-val2017", + table_description="Box MAPs are reported on COCO val2017:", + title_character="^", ) generate_weights_table( module=M.detection, @@ -431,22 +452,31 @@ def generate_weights_table(module, table_name, metrics, dataset, include_pattern metrics=[("box_map", "Box MAP"), ("mask_map", "Mask MAP")], dataset="COCO-val2017", include_patterns=["Mask"], + table_description="Box and Mask MAPs are reported on COCO val2017:", + title_character="^", ) generate_weights_table( module=M.detection, - table_name="detection_keypoint", + table_name="keypoint_detection", metrics=[("box_map", "Box MAP"), ("kp_map", "Keypoint MAP")], dataset="COCO-val2017", include_patterns=["Keypoint"], + table_description="Box and Keypoint MAPs are reported on COCO val2017:", + title_character="^", ) generate_weights_table( module=M.segmentation, - table_name="segmentation", + table_name="semantic_segmentation", metrics=[("miou", "Mean IoU"), ("pixel_acc", "pixelwise Acc")], dataset="COCO-val2017-VOC-labels", + table_description="All models are evaluated a subset of COCO val2017, on the 20 categories that are present in the Pascal VOC dataset:", ) generate_weights_table( - module=M.video, table_name="video", metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")], dataset="Kinetics-400" + module=M.video, + table_name="video_classification", + metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")], + dataset="Kinetics-400", + table_description="Accuracies are reported on Kinetics-400 using single crops for clip length 16:", ) diff --git a/docs/source/models.rst b/docs/source/models.rst index 410e0e42e99..887722bf1ad 100644 --- a/docs/source/models.rst +++ b/docs/source/models.rst @@ -251,10 +251,6 @@ Here is an example of how to use the pre-trained image classification models: The classes of the pre-trained model outputs can be found at ``weights.meta["categories"]``. -Table of all available classification weights ---------------------------------------------- - -Accuracies are reported on ImageNet-1K using single crops: .. include:: generated/classification_table.rst @@ -309,12 +305,8 @@ Here is an example of how to use the pre-trained quantized image classification The classes of the pre-trained model outputs can be found at ``weights.meta["categories"]``. -Table of all available quantized classification weights -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Accuracies are reported on ImageNet-1K using single crops: -.. include:: generated/classification_quant_table.rst +.. include:: generated/quantized_classification_table.rst Semantic Segmentation ===================== @@ -367,12 +359,8 @@ The classes of the pre-trained model outputs can be found at ``weights.meta["cat The output format of the models is illustrated in :ref:`semantic_seg_output`. -Table of all available semantic segmentation weights ----------------------------------------------------- -All models are evaluated a subset of COCO val2017, on the 20 categories that are present in the Pascal VOC dataset: - -.. include:: generated/segmentation_table.rst +.. include:: generated/semantic_segmentation_table.rst .. _object_det_inst_seg_pers_keypoint_det: @@ -442,12 +430,8 @@ Here is an example of how to use the pre-trained object detection models: The classes of the pre-trained model outputs can be found at ``weights.meta["categories"]``. For details on how to plot the bounding boxes of the models, you may refer to :ref:`instance_seg_output`. -Table of all available Object detection weights -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Box MAPs are reported on COCO val2017: -.. include:: generated/detection_table.rst +.. include:: generated/object_detection_table.rst Instance Segmentation @@ -468,10 +452,6 @@ weights: For details on how to plot the masks of the models, you may refer to :ref:`instance_seg_output`. -Table of all available Instance segmentation weights -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Box and Mask MAPs are reported on COCO val2017: .. include:: generated/instance_segmentation_table.rst @@ -493,12 +473,8 @@ pre-trained weights: The classes of the pre-trained model outputs can be found at ``weights.meta["keypoint_names"]``. For details on how to plot the bounding boxes of the models, you may refer to :ref:`keypoint_output`. -Table of all available Keypoint detection weights -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Box and Keypoint MAPs are reported on COCO val2017: -.. include:: generated/detection_keypoint_table.rst +.. include:: generated/keypoint_detection_table.rst Video Classification @@ -551,12 +527,8 @@ Here is an example of how to use the pre-trained video classification models: The classes of the pre-trained model outputs can be found at ``weights.meta["categories"]``. -Table of all available video classification weights ---------------------------------------------------- - -Accuracies are reported on Kinetics-400 using single crops for clip length 16: -.. include:: generated/video_table.rst +.. include:: generated/video_classification_table.rst Optical Flow ============