revert image size to (width, height)

pmeier · pmeier · commit 71e4c56bd09c · 2022-02-25T11:55:16.000+01:00
diff --git a/torchvision/prototype/transforms/_augment.py b/torchvision/prototype/transforms/_augment.py
@@ -42,7 +42,7 @@ def __init__(
     def _get_params(self, sample: Any) -> Dict[str, Any]:
         image = query_image(sample)
         img_c = F.get_image_num_channels(image)
-        img_h, img_w = F.get_image_size(image)
+        img_w, img_h = F.get_image_size(image)
 
         if isinstance(self.value, (int, float)):
             value = [self.value]
@@ -138,7 +138,7 @@ def _get_params(self, sample: Any) -> Dict[str, Any]:
         lam = float(self._dist.sample(()))
 
         image = query_image(sample)
-        H, W = F.get_image_size(image)
+        W, H = F.get_image_size(image)
 
         r_x = torch.randint(W, ())
         r_y = torch.randint(H, ())
diff --git a/torchvision/prototype/transforms/_auto_augment.py b/torchvision/prototype/transforms/_auto_augment.py
@@ -160,8 +160,8 @@ class AutoAugment(_AutoAugmentBase):
     _AUGMENTATION_SPACE = {
         "ShearX": (lambda num_bins, image_size: torch.linspace(0.0, 0.3, num_bins), True),
         "ShearY": (lambda num_bins, image_size: torch.linspace(0.0, 0.3, num_bins), True),
-        "TranslateX": (lambda num_bins, image_size: torch.linspace(0.0, 150.0 / 331.0 * image_size[1], num_bins), True),
-        "TranslateY": (lambda num_bins, image_size: torch.linspace(0.0, 150.0 / 331.0 * image_size[0], num_bins), True),
+        "TranslateX": (lambda num_bins, image_size: torch.linspace(0.0, 150.0 / 331.0 * image_size[0], num_bins), True),
+        "TranslateY": (lambda num_bins, image_size: torch.linspace(0.0, 150.0 / 331.0 * image_size[1], num_bins), True),
         "Rotate": (lambda num_bins, image_size: torch.linspace(0.0, 30.0, num_bins), True),
         "Brightness": (lambda num_bins, image_size: torch.linspace(0.0, 0.9, num_bins), True),
         "Color": (lambda num_bins, image_size: torch.linspace(0.0, 0.9, num_bins), True),
@@ -306,8 +306,8 @@ class RandAugment(_AutoAugmentBase):
         "Identity": (lambda num_bins, image_size: None, False),
         "ShearX": (lambda num_bins, image_size: torch.linspace(0.0, 0.3, num_bins), True),
         "ShearY": (lambda num_bins, image_size: torch.linspace(0.0, 0.3, num_bins), True),
-        "TranslateX": (lambda num_bins, image_size: torch.linspace(0.0, 150.0 / 331.0 * image_size[1], num_bins), True),
-        "TranslateY": (lambda num_bins, image_size: torch.linspace(0.0, 150.0 / 331.0 * image_size[0], num_bins), True),
+        "TranslateX": (lambda num_bins, image_size: torch.linspace(0.0, 150.0 / 331.0 * image_size[0], num_bins), True),
+        "TranslateY": (lambda num_bins, image_size: torch.linspace(0.0, 150.0 / 331.0 * image_size[1], num_bins), True),
         "Rotate": (lambda num_bins, image_size: torch.linspace(0.0, 30.0, num_bins), True),
         "Brightness": (lambda num_bins, image_size: torch.linspace(0.0, 0.9, num_bins), True),
         "Color": (lambda num_bins, image_size: torch.linspace(0.0, 0.9, num_bins), True),
diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py
@@ -109,7 +109,7 @@ def __init__(
 
     def _get_params(self, sample: Any) -> Dict[str, Any]:
         image = query_image(sample)
-        height, width = F.get_image_size(image)
+        width, height = F.get_image_size(image)
         area = height * width
 
         log_ratio = torch.log(torch.tensor(self.ratio))
diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py
@@ -40,7 +40,7 @@ def resize_image_tensor(
     antialias: Optional[bool] = None,
 ) -> torch.Tensor:
     new_height, new_width = size
-    old_height, old_width = _FT.get_image_size(image)
+    old_width, old_height = _FT.get_image_size(image)
     num_channels = _FT.get_image_num_channels(image)
     batch_shape = image.shape[:-3]
     return _FT.resize(
@@ -143,7 +143,7 @@ def affine_image_tensor(
 
     center_f = [0.0, 0.0]
     if center is not None:
-        height, width = get_image_size(img)
+        width, height = get_image_size(img)
         # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center.
         center_f = [1.0 * (c - s * 0.5) for c, s in zip(center, (width, height))]
 
@@ -169,7 +169,7 @@ def affine_image_pil(
     # it is visually better to estimate the center without 0.5 offset
     # otherwise image rotated by 90 degrees is shifted vs output image of torch.rot90 or F_t.affine
     if center is None:
-        height, width = get_image_size(img)
+        width, height = get_image_size(img)
         center = [width * 0.5, height * 0.5]
     matrix = _get_inverse_affine_matrix(center, angle, translate, scale, shear)
 
@@ -186,7 +186,7 @@ def rotate_image_tensor(
 ) -> torch.Tensor:
     center_f = [0.0, 0.0]
     if center is not None:
-        height, width = get_image_size(img)
+        width, height = get_image_size(img)
         # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center.
         center_f = [1.0 * (c - s * 0.5) for c, s in zip(center, (width, height))]
 
@@ -262,13 +262,13 @@ def _center_crop_compute_crop_anchor(
 
 def center_crop_image_tensor(img: torch.Tensor, output_size: List[int]) -> torch.Tensor:
     crop_height, crop_width = _center_crop_parse_output_size(output_size)
-    image_height, image_width = get_image_size(img)
+    image_width, image_height = get_image_size(img)
 
     if crop_height > image_height or crop_width > image_width:
         padding_ltrb = _center_crop_compute_padding(crop_height, crop_width, image_height, image_width)
         img = pad_image_tensor(img, padding_ltrb, fill=0)
 
-        image_height, image_width = get_image_size(img)
+        image_width, image_height = get_image_size(img)
         if crop_width == image_width and crop_height == image_height:
             return img
 
@@ -278,13 +278,13 @@ def center_crop_image_tensor(img: torch.Tensor, output_size: List[int]) -> torch
 
 def center_crop_image_pil(img: PIL.Image.Image, output_size: List[int]) -> PIL.Image.Image:
     crop_height, crop_width = _center_crop_parse_output_size(output_size)
-    image_height, image_width = get_image_size(img)
+    image_width, image_height = get_image_size(img)
 
     if crop_height > image_height or crop_width > image_width:
         padding_ltrb = _center_crop_compute_padding(crop_height, crop_width, image_height, image_width)
         img = pad_image_pil(img, padding_ltrb, fill=0)
 
-        image_height, image_width = get_image_size(img)
+        image_width, image_height = get_image_size(img)
         if crop_width == image_width and crop_height == image_height:
             return img
 
diff --git a/torchvision/prototype/transforms/functional/_utils.py b/torchvision/prototype/transforms/functional/_utils.py
@@ -8,13 +8,12 @@
 
 def get_image_size(image: Union[PIL.Image.Image, torch.Tensor, features.Image]) -> Tuple[int, int]:
     if isinstance(image, features.Image):
-        return image.image_size
+        height, width = image.image_size
+        return width, height
     elif isinstance(image, torch.Tensor):
-        width, height = _FT.get_image_size(image)
-        return height, width
+        return cast(Tuple[int, int], tuple(_FT.get_image_size(image)))
     if isinstance(image, PIL.Image.Image):
-        width, height = _FP.get_image_size(image)
-        return height, width
+        return cast(Tuple[int, int], tuple(_FP.get_image_size(image)))
     else:
         raise TypeError(f"unable to get image size from object of type {type(image).__name__}")