Added tests for rotation, affine and zoom transforms

vfdev-5 · vfdev-5 · commit a7d17ecf7251 · 2022-07-08T16:38:56.000+02:00
diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py
@@ -73,6 +73,8 @@ class TestSmoke:
         transforms.RandomHorizontalFlip(),
         transforms.Pad(5),
         transforms.RandomZoomOut(),
+        transforms.RandomRotation(degrees=(-45, 45)),
+        transforms.RandomAffine(degrees=(-45, 45)),
     )
     def test_common(self, transform, input):
         transform(input)
diff --git a/torchvision/prototype/transforms/__init__.py b/torchvision/prototype/transforms/__init__.py
@@ -17,6 +17,8 @@
     RandomVerticalFlip,
     Pad,
     RandomZoomOut,
+    RandomRotation,
+    RandomAffine,
 )
 from ._meta import ConvertBoundingBoxFormat, ConvertImageDtype, ConvertImageColorSpace
 from ._misc import Identity, Normalize, ToDtype, Lambda
diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py
@@ -210,18 +210,22 @@ def affine_image_tensor(
     fill: Optional[List[float]] = None,
     center: Optional[List[float]] = None,
 ) -> torch.Tensor:
+    num_channels, height, width = img.shape[-3:]
+    extra_dims = img.shape[:-3]
+    img = img.view(-1, num_channels, height, width)
+
     angle, translate, shear, center = _affine_parse_args(angle, translate, scale, shear, interpolation, center)
 
     center_f = [0.0, 0.0]
     if center is not None:
-        _, height, width = get_dimensions_image_tensor(img)
         # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center.
         center_f = [1.0 * (c - s * 0.5) for c, s in zip(center, [width, height])]
 
     translate_f = [1.0 * t for t in translate]
     matrix = _get_inverse_affine_matrix(center_f, angle, translate_f, scale, shear)
 
-    return _FT.affine(img, matrix, interpolation=interpolation.value, fill=fill)
+    output = _FT.affine(img, matrix, interpolation=interpolation.value, fill=fill)
+    return output.view(extra_dims + (num_channels, height, width))
 
 
 def affine_image_pil(
@@ -344,15 +348,15 @@ def affine_bounding_box(
 
 
 def affine_segmentation_mask(
-    img: torch.Tensor,
+    mask: torch.Tensor,
     angle: float,
     translate: List[float],
     scale: float,
     shear: List[float],
     center: Optional[List[float]] = None,
 ) -> torch.Tensor:
     return affine_image_tensor(
-        img,
+        mask,
         angle=angle,
         translate=translate,
         scale=scale,
@@ -423,6 +427,10 @@ def rotate_image_tensor(
     fill: Optional[List[float]] = None,
     center: Optional[List[float]] = None,
 ) -> torch.Tensor:
+    num_channels, height, width = img.shape[-3:]
+    extra_dims = img.shape[:-3]
+    img = img.view(-1, num_channels, height, width)
+
     center_f = [0.0, 0.0]
     if center is not None:
         if expand:
@@ -435,7 +443,8 @@ def rotate_image_tensor(
     # due to current incoherence of rotation angle direction between affine and rotate implementations
     # we need to set -angle.
     matrix = _get_inverse_affine_matrix(center_f, -angle, [0.0, 0.0], 1.0, [0.0, 0.0])
-    return _FT.rotate(img, matrix, interpolation=interpolation.value, expand=expand, fill=fill)
+    output = _FT.rotate(img, matrix, interpolation=interpolation.value, expand=expand, fill=fill)
+    return output.view(extra_dims + (num_channels, height, width))
 
 
 def rotate_image_pil(
@@ -518,15 +527,15 @@ def rotate(
 def pad_image_tensor(
     img: torch.Tensor, padding: Union[int, List[int]], fill: Union[int, float] = 0, padding_mode: str = "constant"
 ) -> torch.Tensor:
-    num_masks, height, width = img.shape[-3:]
+    num_channels, height, width = img.shape[-3:]
     extra_dims = img.shape[:-3]
 
     padded_image = _FT.pad(
-        img=img.view(-1, num_masks, height, width), padding=padding, fill=fill, padding_mode=padding_mode
+        img=img.view(-1, num_channels, height, width), padding=padding, fill=fill, padding_mode=padding_mode
     )
 
     new_height, new_width = padded_image.shape[-2:]
-    return padded_image.view(extra_dims + (num_masks, new_height, new_width))
+    return padded_image.view(extra_dims + (num_channels, new_height, new_width))
 
 
 # TODO: This should be removed once pytorch pad supports non-scalar padding values

Original file line number	Diff line number	Diff line change
`@@ -73,6 +73,8 @@ class TestSmoke:`
`73`	`73`	`transforms.RandomHorizontalFlip(),`
`74`	`74`	`transforms.Pad(5),`
`75`	`75`	`transforms.RandomZoomOut(),`
	`76`	`+ transforms.RandomRotation(degrees=(-45, 45)),`
	`77`	`+ transforms.RandomAffine(degrees=(-45, 45)),`
`76`	`78`	`)`
`77`	`79`	`def test_common(self, transform, input):`
`78`	`80`	`transform(input)`
Original file line number	Diff line number	Diff line change
`@@ -17,6 +17,8 @@`
`17`	`17`	`RandomVerticalFlip,`
`18`	`18`	`Pad,`
`19`	`19`	`RandomZoomOut,`
	`20`	`+ RandomRotation,`
	`21`	`+ RandomAffine,`
`20`	`22`	`)`
`21`	`23`	`from ._meta import ConvertBoundingBoxFormat, ConvertImageDtype, ConvertImageColorSpace`
`22`	`24`	`from ._misc import Identity, Normalize, ToDtype, Lambda`