11
11
from torch .nn .functional import one_hot
12
12
from torchvision .prototype import features
13
13
from torchvision .prototype .transforms .functional ._meta import convert_bounding_box_format
14
+ from torchvision .transforms .functional import _get_perspective_coeffs
14
15
from torchvision .transforms .functional_tensor import _max_value as get_max_value
15
16
16
17
make_tensor = functools .partial (torch .testing .make_tensor , device = "cpu" )
@@ -380,6 +381,37 @@ def pad_segmentation_mask():
380
381
yield SampleInput (mask , padding = padding , padding_mode = padding_mode )
381
382
382
383
384
+ @register_kernel_info_from_sample_inputs_fn
385
+ def perspective_bounding_box ():
386
+ for bounding_box , perspective_coeffs in itertools .product (
387
+ make_bounding_boxes (),
388
+ [
389
+ [1.2405 , 0.1772 , - 6.9113 , 0.0463 , 1.251 , - 5.235 , 0.00013 , 0.0018 ],
390
+ [0.7366 , - 0.11724 , 1.45775 , - 0.15012 , 0.73406 , 2.6019 , - 0.0072 , - 0.0063 ],
391
+ ],
392
+ ):
393
+ yield SampleInput (
394
+ bounding_box ,
395
+ format = bounding_box .format ,
396
+ perspective_coeffs = perspective_coeffs ,
397
+ )
398
+
399
+
400
+ @register_kernel_info_from_sample_inputs_fn
401
+ def perspective_segmentation_mask ():
402
+ for mask , perspective_coeffs in itertools .product (
403
+ make_segmentation_masks (extra_dims = ((), (4 ,))),
404
+ [
405
+ [1.2405 , 0.1772 , - 6.9113 , 0.0463 , 1.251 , - 5.235 , 0.00013 , 0.0018 ],
406
+ [0.7366 , - 0.11724 , 1.45775 , - 0.15012 , 0.73406 , 2.6019 , - 0.0072 , - 0.0063 ],
407
+ ],
408
+ ):
409
+ yield SampleInput (
410
+ mask ,
411
+ perspective_coeffs = perspective_coeffs ,
412
+ )
413
+
414
+
383
415
@register_kernel_info_from_sample_inputs_fn
384
416
def center_crop_bounding_box ():
385
417
for bounding_box , output_size in itertools .product (make_bounding_boxes (), [(24 , 12 ), [16 , 18 ], [46 , 48 ], [12 ]]):
@@ -993,7 +1025,7 @@ def test_correctness_vertical_flip_segmentation_mask_on_fixed_input(device):
993
1025
],
994
1026
)
995
1027
def test_correctness_resized_crop_bounding_box (device , format , top , left , height , width , size ):
996
- def _compute_expected (bbox , top_ , left_ , height_ , width_ , size_ ):
1028
+ def _compute_expected_bbox (bbox , top_ , left_ , height_ , width_ , size_ ):
997
1029
# bbox should be xyxy
998
1030
bbox [0 ] = (bbox [0 ] - left_ ) * size_ [1 ] / width_
999
1031
bbox [1 ] = (bbox [1 ] - top_ ) * size_ [0 ] / height_
@@ -1009,7 +1041,7 @@ def _compute_expected(bbox, top_, left_, height_, width_, size_):
1009
1041
]
1010
1042
expected_bboxes = []
1011
1043
for in_box in in_boxes :
1012
- expected_bboxes .append (_compute_expected (list (in_box ), top , left , height , width , size ))
1044
+ expected_bboxes .append (_compute_expected_bbox (list (in_box ), top , left , height , width , size ))
1013
1045
expected_bboxes = torch .tensor (expected_bboxes , device = device )
1014
1046
1015
1047
in_boxes = features .BoundingBox (
@@ -1035,7 +1067,7 @@ def _compute_expected(bbox, top_, left_, height_, width_, size_):
1035
1067
],
1036
1068
)
1037
1069
def test_correctness_resized_crop_segmentation_mask (device , top , left , height , width , size ):
1038
- def _compute_expected (mask , top_ , left_ , height_ , width_ , size_ ):
1070
+ def _compute_expected_mask (mask , top_ , left_ , height_ , width_ , size_ ):
1039
1071
output = mask .clone ()
1040
1072
output = output [:, top_ : top_ + height_ , left_ : left_ + width_ ]
1041
1073
output = torch .nn .functional .interpolate (output [None , :].float (), size = size_ , mode = "nearest" )
@@ -1046,7 +1078,7 @@ def _compute_expected(mask, top_, left_, height_, width_, size_):
1046
1078
in_mask [0 , 10 :20 , 10 :20 ] = 1
1047
1079
in_mask [0 , 5 :15 , 12 :23 ] = 2
1048
1080
1049
- expected_mask = _compute_expected (in_mask , top , left , height , width , size )
1081
+ expected_mask = _compute_expected_mask (in_mask , top , left , height , width , size )
1050
1082
output_mask = F .resized_crop_segmentation_mask (in_mask , top , left , height , width , size )
1051
1083
torch .testing .assert_close (output_mask , expected_mask )
1052
1084
@@ -1095,6 +1127,161 @@ def parse_padding():
1095
1127
torch .testing .assert_close (out_mask , expected_mask )
1096
1128
1097
1129
1130
+ @pytest .mark .parametrize ("device" , cpu_and_gpu ())
1131
+ @pytest .mark .parametrize (
1132
+ "startpoints, endpoints" ,
1133
+ [
1134
+ [[[0 , 0 ], [33 , 0 ], [33 , 25 ], [0 , 25 ]], [[3 , 2 ], [32 , 3 ], [30 , 24 ], [2 , 25 ]]],
1135
+ [[[3 , 2 ], [32 , 3 ], [30 , 24 ], [2 , 25 ]], [[0 , 0 ], [33 , 0 ], [33 , 25 ], [0 , 25 ]]],
1136
+ [[[3 , 2 ], [32 , 3 ], [30 , 24 ], [2 , 25 ]], [[5 , 5 ], [30 , 3 ], [33 , 19 ], [4 , 25 ]]],
1137
+ ],
1138
+ )
1139
+ def test_correctness_perspective_bounding_box (device , startpoints , endpoints ):
1140
+ def _compute_expected_bbox (bbox , pcoeffs_ ):
1141
+ m1 = np .array (
1142
+ [
1143
+ [pcoeffs_ [0 ], pcoeffs_ [1 ], pcoeffs_ [2 ]],
1144
+ [pcoeffs_ [3 ], pcoeffs_ [4 ], pcoeffs_ [5 ]],
1145
+ ]
1146
+ )
1147
+ m2 = np .array (
1148
+ [
1149
+ [pcoeffs_ [6 ], pcoeffs_ [7 ], 1.0 ],
1150
+ [pcoeffs_ [6 ], pcoeffs_ [7 ], 1.0 ],
1151
+ ]
1152
+ )
1153
+
1154
+ bbox_xyxy = convert_bounding_box_format (
1155
+ bbox , old_format = bbox .format , new_format = features .BoundingBoxFormat .XYXY
1156
+ )
1157
+ points = np .array (
1158
+ [
1159
+ [bbox_xyxy [0 ].item (), bbox_xyxy [1 ].item (), 1.0 ],
1160
+ [bbox_xyxy [2 ].item (), bbox_xyxy [1 ].item (), 1.0 ],
1161
+ [bbox_xyxy [0 ].item (), bbox_xyxy [3 ].item (), 1.0 ],
1162
+ [bbox_xyxy [2 ].item (), bbox_xyxy [3 ].item (), 1.0 ],
1163
+ ]
1164
+ )
1165
+ numer = np .matmul (points , m1 .T )
1166
+ denom = np .matmul (points , m2 .T )
1167
+ transformed_points = numer / denom
1168
+ out_bbox = [
1169
+ np .min (transformed_points [:, 0 ]),
1170
+ np .min (transformed_points [:, 1 ]),
1171
+ np .max (transformed_points [:, 0 ]),
1172
+ np .max (transformed_points [:, 1 ]),
1173
+ ]
1174
+ out_bbox = features .BoundingBox (
1175
+ out_bbox ,
1176
+ format = features .BoundingBoxFormat .XYXY ,
1177
+ image_size = bbox .image_size ,
1178
+ dtype = torch .float32 ,
1179
+ device = bbox .device ,
1180
+ )
1181
+ return convert_bounding_box_format (
1182
+ out_bbox , old_format = features .BoundingBoxFormat .XYXY , new_format = bbox .format , copy = False
1183
+ )
1184
+
1185
+ image_size = (32 , 38 )
1186
+
1187
+ pcoeffs = _get_perspective_coeffs (startpoints , endpoints )
1188
+ inv_pcoeffs = _get_perspective_coeffs (endpoints , startpoints )
1189
+
1190
+ for bboxes in make_bounding_boxes (
1191
+ image_sizes = [
1192
+ image_size ,
1193
+ ],
1194
+ extra_dims = ((4 ,),),
1195
+ ):
1196
+ bboxes = bboxes .to (device )
1197
+ bboxes_format = bboxes .format
1198
+ bboxes_image_size = bboxes .image_size
1199
+
1200
+ output_bboxes = F .perspective_bounding_box (
1201
+ bboxes ,
1202
+ bboxes_format ,
1203
+ perspective_coeffs = pcoeffs ,
1204
+ )
1205
+
1206
+ if bboxes .ndim < 2 :
1207
+ bboxes = [bboxes ]
1208
+
1209
+ expected_bboxes = []
1210
+ for bbox in bboxes :
1211
+ bbox = features .BoundingBox (bbox , format = bboxes_format , image_size = bboxes_image_size )
1212
+ expected_bboxes .append (_compute_expected_bbox (bbox , inv_pcoeffs ))
1213
+ if len (expected_bboxes ) > 1 :
1214
+ expected_bboxes = torch .stack (expected_bboxes )
1215
+ else :
1216
+ expected_bboxes = expected_bboxes [0 ]
1217
+ torch .testing .assert_close (output_bboxes , expected_bboxes , rtol = 1e-5 , atol = 1e-5 )
1218
+
1219
+
1220
+ @pytest .mark .parametrize ("device" , cpu_and_gpu ())
1221
+ @pytest .mark .parametrize (
1222
+ "startpoints, endpoints" ,
1223
+ [
1224
+ [[[0 , 0 ], [33 , 0 ], [33 , 25 ], [0 , 25 ]], [[3 , 2 ], [32 , 3 ], [30 , 24 ], [2 , 25 ]]],
1225
+ [[[3 , 2 ], [32 , 3 ], [30 , 24 ], [2 , 25 ]], [[0 , 0 ], [33 , 0 ], [33 , 25 ], [0 , 25 ]]],
1226
+ [[[3 , 2 ], [32 , 3 ], [30 , 24 ], [2 , 25 ]], [[5 , 5 ], [30 , 3 ], [33 , 19 ], [4 , 25 ]]],
1227
+ ],
1228
+ )
1229
+ def test_correctness_perspective_segmentation_mask (device , startpoints , endpoints ):
1230
+ def _compute_expected_mask (mask , pcoeffs_ ):
1231
+ assert mask .ndim == 3 and mask .shape [0 ] == 1
1232
+ m1 = np .array (
1233
+ [
1234
+ [pcoeffs_ [0 ], pcoeffs_ [1 ], pcoeffs_ [2 ]],
1235
+ [pcoeffs_ [3 ], pcoeffs_ [4 ], pcoeffs_ [5 ]],
1236
+ ]
1237
+ )
1238
+ m2 = np .array (
1239
+ [
1240
+ [pcoeffs_ [6 ], pcoeffs_ [7 ], 1.0 ],
1241
+ [pcoeffs_ [6 ], pcoeffs_ [7 ], 1.0 ],
1242
+ ]
1243
+ )
1244
+
1245
+ expected_mask = torch .zeros_like (mask .cpu ())
1246
+ for out_y in range (expected_mask .shape [1 ]):
1247
+ for out_x in range (expected_mask .shape [2 ]):
1248
+ output_pt = np .array ([out_x + 0.5 , out_y + 0.5 , 1.0 ])
1249
+
1250
+ numer = np .matmul (output_pt , m1 .T )
1251
+ denom = np .matmul (output_pt , m2 .T )
1252
+ input_pt = np .floor (numer / denom ).astype (np .int32 )
1253
+
1254
+ in_x , in_y = input_pt [:2 ]
1255
+ if 0 <= in_x < mask .shape [2 ] and 0 <= in_y < mask .shape [1 ]:
1256
+ expected_mask [0 , out_y , out_x ] = mask [0 , in_y , in_x ]
1257
+ return expected_mask .to (mask .device )
1258
+
1259
+ pcoeffs = _get_perspective_coeffs (startpoints , endpoints )
1260
+
1261
+ for mask in make_segmentation_masks (extra_dims = ((), (4 ,))):
1262
+ mask = mask .to (device )
1263
+
1264
+ output_mask = F .perspective_segmentation_mask (
1265
+ mask ,
1266
+ perspective_coeffs = pcoeffs ,
1267
+ )
1268
+
1269
+ if mask .ndim < 4 :
1270
+ masks = [mask ]
1271
+ else :
1272
+ masks = [m for m in mask ]
1273
+
1274
+ expected_masks = []
1275
+ for mask in masks :
1276
+ expected_mask = _compute_expected_mask (mask , pcoeffs )
1277
+ expected_masks .append (expected_mask )
1278
+ if len (expected_masks ) > 1 :
1279
+ expected_masks = torch .stack (expected_masks )
1280
+ else :
1281
+ expected_masks = expected_masks [0 ]
1282
+ torch .testing .assert_close (output_mask , expected_masks )
1283
+
1284
+
1098
1285
@pytest .mark .parametrize ("device" , cpu_and_gpu ())
1099
1286
@pytest .mark .parametrize (
1100
1287
"output_size" ,
@@ -1148,5 +1335,4 @@ def _compute_expected_bbox(bbox, output_size_):
1148
1335
expected_bboxes = torch .stack (expected_bboxes )
1149
1336
else :
1150
1337
expected_bboxes = expected_bboxes [0 ]
1151
- expected_bboxes = expected_bboxes .to (device = device )
1152
1338
torch .testing .assert_close (output_boxes , expected_bboxes )
0 commit comments