Removing hardcoded interpolation and sizes from the scripts.

datumbox · datumbox · commit 9ee69c4202cd · 2021-10-21T11:04:41.000+01:00
diff --git a/references/classification/README.md b/references/classification/README.md
@@ -31,6 +31,17 @@ Here `$MODEL` is one of `alexnet`, `vgg11`, `vgg13`, `vgg16` or `vgg19`. Note
 that `vgg11_bn`, `vgg13_bn`, `vgg16_bn`, and `vgg19_bn` include batch
 normalization and thus are trained with the default parameters.
 
+### Inception V3
+
+The weights of the Inception V3 model are ported from the original paper rather than trained from scratch.
+
+Since it expects tensors with a size of N x 3 x 299 x 299, to validate the model use the following command:
+
+```
+torchrun --nproc_per_node=8 train.py --model inception_v3
+      --val-resize-size 342 --val-crop-size 299 --train-crop-size 299 --test-only --pretrained
+```
+
 ### ResNext-50 32x4d
 ```
 torchrun --nproc_per_node=8 train.py\
@@ -79,6 +90,25 @@ The weights of the B0-B4 variants are ported from Ross Wightman's [timm repo](ht
 
 The weights of the B5-B7 variants are ported from Luke Melas' [EfficientNet-PyTorch repo](https://github.com/lukemelas/EfficientNet-PyTorch/blob/1039e009545d9329ea026c9f7541341439712b96/efficientnet_pytorch/utils.py#L562-L564).
 
+All models were trained using Bicubic interpolation and each have custom crop and resize sizes. To validate the models use the following commands:
+```
+torchrun --nproc_per_node=8 train.py --model efficientnet_b0 --interpolation bicubic\
+     --val-resize-size 256 --val-crop-size 224 --train-crop-size 224 --test-only --pretrained
+torchrun --nproc_per_node=8 train.py --model efficientnet_b1 --interpolation bicubic\
+      --val-resize-size 256 --val-crop-size 240 --train-crop-size 240 --test-only --pretrained
+torchrun --nproc_per_node=8 train.py --model efficientnet_b2 --interpolation bicubic\
+      --val-resize-size 288 --val-crop-size 288 --train-crop-size 288 --test-only --pretrained
+torchrun --nproc_per_node=8 train.py --model efficientnet_b3 --interpolation bicubic\
+      --val-resize-size 320 --val-crop-size 300 --train-crop-size 300 --test-only --pretrained
+torchrun --nproc_per_node=8 train.py --model efficientnet_b4 --interpolation bicubic\
+      --val-resize-size 384 --val-crop-size 380 --train-crop-size 380 --test-only --pretrained
+torchrun --nproc_per_node=8 train.py --model efficientnet_b5 --interpolation bicubic\
+      --val-resize-size 456 --val-crop-size 456 --train-crop-size 456 --test-only --pretrained
+torchrun --nproc_per_node=8 train.py --model efficientnet_b6 --interpolation bicubic\
+      --val-resize-size 528 --val-crop-size 528 --train-crop-size 528 --test-only --pretrained
+torchrun --nproc_per_node=8 train.py --model efficientnet_b7 --interpolation bicubic\
+      --val-resize-size 600 --val-crop-size 600 --train-crop-size 600 --test-only --pretrained
+```
 
 ### RegNet
 
@@ -181,3 +211,8 @@ For post training quant, device is set to CPU. For training, the device is set t
 ```
 python train_quantization.py --device='cpu' --test-only --backend='<backend>' --model='<model_name>'
 ```
+
+For inception_v3 you need to pass the following extra parameters:
+```
+--val-resize-size 342 --val-crop-size 299 --train-crop-size 299
+```
diff --git a/references/classification/train.py b/references/classification/train.py
@@ -107,26 +107,8 @@ def _get_cache_path(filepath):
 def load_data(traindir, valdir, args):
     # Data loading code
     print("Loading data")
-    val_resize_size, val_crop_size, train_crop_size = 256, 224, 224
+    val_resize_size, val_crop_size, train_crop_size = args.val_resize_size, args.val_crop_size, args.train_crop_size
     interpolation = InterpolationMode(args.interpolation)
-    if args.model == "inception_v3":
-        val_resize_size, val_crop_size, train_crop_size = 342, 299, 299
-    elif args.model == "resnet50":
-        val_resize_size, val_crop_size, train_crop_size = 256, 224, 176
-    elif args.model.startswith("efficientnet_"):
-        sizes = {
-            "b0": (256, 224, 224),
-            "b1": (256, 240, 240),
-            "b2": (288, 288, 288),
-            "b3": (320, 300, 300),
-            "b4": (384, 380, 380),
-            "b5": (456, 456, 456),
-            "b6": (528, 528, 528),
-            "b7": (600, 600, 600),
-        }
-        e_type = args.model.replace("efficientnet_", "")
-        val_resize_size, val_crop_size, train_crop_size = sizes[e_type]
-        interpolation = InterpolationMode.BICUBIC
 
     print("Loading training data")
     st = time.time()
@@ -458,7 +440,13 @@ def get_args_parser(add_help=True):
     parser.add_argument(
         "--use-deterministic-algorithms", action="store_true", help="Forces the use of deterministic algorithms only."
     )
-    parser.add_argument("--interpolation", default="bilinear", help="the default interpolation (default: bilinear)")
+    parser.add_argument("--interpolation", default="bilinear", help="the interpolation method (default: bilinear)")
+    parser.add_argument("--val-resize-size", default=256, type=int,
+                        help="the resize size used for validation (default: 256)")
+    parser.add_argument("--val-crop-size", default=224, type=int,
+                        help="the central crop size used for validation (default: 224)")
+    parser.add_argument("--train-crop-size", default=224, type=int,
+                        help="the random crop size used for training (default: 224)")
 
     return parser
 
diff --git a/references/classification/train_quantization.py b/references/classification/train_quantization.py
@@ -236,6 +236,14 @@ def get_args_parser(add_help=True):
     parser.add_argument("--world-size", default=1, type=int, help="number of distributed processes")
     parser.add_argument("--dist-url", default="env://", help="url used to set up distributed training")
 
+    parser.add_argument("--interpolation", default="bilinear", help="the interpolation method (default: bilinear)")
+    parser.add_argument("--val-resize-size", default=256, type=int,
+                        help="the resize size used for validation (default: 256)")
+    parser.add_argument("--val-crop-size", default=224, type=int,
+                        help="the central crop size used for validation (default: 224)")
+    parser.add_argument("--train-crop-size", default=224, type=int,
+                        help="the random crop size used for training (default: 224)")
+
     return parser