From ca7d8220192d30a9bd7e5302887e8eb30c89bf0d Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 17 May 2022 13:44:31 +0200 Subject: [PATCH 1/3] add tests for F.pad_bounding_box --- test/test_prototype_transforms_functional.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py index b24e9a41ff7..f1696b392b4 100644 --- a/test/test_prototype_transforms_functional.py +++ b/test/test_prototype_transforms_functional.py @@ -382,6 +382,15 @@ def pad_segmentation_mask(): yield SampleInput(mask, padding=padding, padding_mode=padding_mode) +@register_kernel_info_from_sample_inputs_fn +def pad_bounding_box(): + for bounding_box, padding in itertools.product( + make_bounding_boxes(), + [[1], [1, 1], [1, 1, 2, 2]], + ): + yield SampleInput(bounding_box, padding=padding, format=bounding_box.format) + + @register_kernel_info_from_sample_inputs_fn def perspective_bounding_box(): for bounding_box, perspective_coeffs in itertools.product( From 5945b41d1cd39ebcc6ebb5efa62b99e1deb50d92 Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Thu, 26 May 2022 09:13:15 +0000 Subject: [PATCH 2/3] Added correctness tests for pad and reimplemented bbox op to keep dtype --- test/test_prototype_transforms_functional.py | 73 +++++++++++++++---- .../transforms/functional/_geometry.py | 18 ++--- 2 files changed, 67 insertions(+), 24 deletions(-) diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py index fb0a30827af..b2bd38848b4 100644 --- a/test/test_prototype_transforms_functional.py +++ b/test/test_prototype_transforms_functional.py @@ -1112,22 +1112,67 @@ def test_correctness_pad_segmentation_mask_on_fixed_input(device): torch.testing.assert_close(out_mask, expected_mask) +def _parse_padding(padding): + if isinstance(padding, int): + return [padding] * 4 + if isinstance(padding, list): + if len(padding) == 1: + return padding * 4 + if len(padding) == 2: + return padding * 2 # [left, up, right, down] + + return padding + + +@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("padding", [[1], [1, 1], [1, 1, 2, 2]]) +def test_correctness_pad_bounding_box(device, padding): + def _compute_expected_bbox(bbox, padding_): + pad_left, pad_up, _, _ = _parse_padding(padding_) + + bbox_format = bbox.format + bbox_dtype = bbox.dtype + bbox = convert_bounding_box_format(bbox, old_format=bbox_format, new_format=features.BoundingBoxFormat.XYXY) + + bbox[0::2] += pad_left + bbox[1::2] += pad_up + + bbox = convert_bounding_box_format( + bbox, old_format=features.BoundingBoxFormat.XYXY, new_format=bbox_format, copy=False + ) + if bbox.dtype != bbox_dtype: + # Temporary cast to original dtype + # e.g. float32 -> int + bbox = bbox.to(bbox_dtype) + return bbox + + for bboxes in make_bounding_boxes(): + bboxes = bboxes.to(device) + bboxes_format = bboxes.format + bboxes_image_size = bboxes.image_size + + output_boxes = F.pad_bounding_box(bboxes, padding, format=bboxes_format) + + if bboxes.ndim < 2: + bboxes = [bboxes] + + expected_bboxes = [] + for bbox in bboxes: + bbox = features.BoundingBox(bbox, format=bboxes_format, image_size=bboxes_image_size) + expected_bboxes.append(_compute_expected_bbox(bbox, padding)) + + if len(expected_bboxes) > 1: + expected_bboxes = torch.stack(expected_bboxes) + else: + expected_bboxes = expected_bboxes[0] + torch.testing.assert_close(output_boxes, expected_bboxes) + + @pytest.mark.parametrize("padding", [[1, 2, 3, 4], [1], 1, [1, 2]]) def test_correctness_pad_segmentation_mask(padding): - def _compute_expected_mask(): - def parse_padding(): - if isinstance(padding, int): - return [padding] * 4 - if isinstance(padding, list): - if len(padding) == 1: - return padding * 4 - if len(padding) == 2: - return padding * 2 # [left, up, right, down] - - return padding - + def _compute_expected_mask(mask, padding_): h, w = mask.shape[-2], mask.shape[-1] - pad_left, pad_up, pad_right, pad_down = parse_padding() + pad_left, pad_up, pad_right, pad_down = _parse_padding(padding_) new_h = h + pad_up + pad_down new_w = w + pad_left + pad_right @@ -1141,7 +1186,7 @@ def parse_padding(): for mask in make_segmentation_masks(): out_mask = F.pad_segmentation_mask(mask, padding, "constant") - expected_mask = _compute_expected_mask() + expected_mask = _compute_expected_mask(mask, padding) torch.testing.assert_close(out_mask, expected_mask) diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py index 00c8a59e395..dfcdce033cf 100644 --- a/torchvision/prototype/transforms/functional/_geometry.py +++ b/torchvision/prototype/transforms/functional/_geometry.py @@ -415,16 +415,14 @@ def pad_bounding_box( ) -> torch.Tensor: left, _, top, _ = _FT._parse_pad_padding(padding) - bounding_box = convert_bounding_box_format( - bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY - ) - - bounding_box[..., 0::2] += left - bounding_box[..., 1::2] += top - - return convert_bounding_box_format( - bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format, copy=False - ) + bounding_box = bounding_box.clone() + + bounding_box[..., 0] += left + bounding_box[..., 1] += top + if format == features.BoundingBoxFormat.XYXY: + bounding_box[..., 2] += left + bounding_box[..., 3] += top + return bounding_box crop_image_tensor = _FT.crop From a7fd05cb61b6496cea7a2e6eddc33bc1f2559bd3 Mon Sep 17 00:00:00 2001 From: vfdev Date: Thu, 26 May 2022 11:47:38 +0200 Subject: [PATCH 3/3] Update _geometry.py --- torchvision/prototype/transforms/functional/_geometry.py | 1 + 1 file changed, 1 insertion(+) diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py index dfcdce033cf..ac0e8e0eb13 100644 --- a/torchvision/prototype/transforms/functional/_geometry.py +++ b/torchvision/prototype/transforms/functional/_geometry.py @@ -417,6 +417,7 @@ def pad_bounding_box( bounding_box = bounding_box.clone() + # this works without conversion since padding only affects xy coordinates bounding_box[..., 0] += left bounding_box[..., 1] += top if format == features.BoundingBoxFormat.XYXY: