11
11
from torch .nn .functional import one_hot
12
12
from torchvision .prototype import features
13
13
from torchvision .prototype .transforms .functional ._meta import convert_bounding_box_format
14
+ from torchvision .transforms .functional import _get_perspective_coeffs
14
15
from torchvision .transforms .functional_tensor import _max_value as get_max_value
15
16
17
+
16
18
make_tensor = functools .partial (torch .testing .make_tensor , device = "cpu" )
17
19
18
20
@@ -380,6 +382,37 @@ def pad_segmentation_mask():
380
382
yield SampleInput (mask , padding = padding , padding_mode = padding_mode )
381
383
382
384
385
+ @register_kernel_info_from_sample_inputs_fn
386
+ def perspective_bounding_box ():
387
+ for bounding_box , perspective_coeffs in itertools .product (
388
+ make_bounding_boxes (),
389
+ [
390
+ [1.2405 , 0.1772 , - 6.9113 , 0.0463 , 1.251 , - 5.235 , 0.00013 , 0.0018 ],
391
+ [0.7366 , - 0.11724 , 1.45775 , - 0.15012 , 0.73406 , 2.6019 , - 0.0072 , - 0.0063 ],
392
+ ],
393
+ ):
394
+ yield SampleInput (
395
+ bounding_box ,
396
+ format = bounding_box .format ,
397
+ perspective_coeffs = perspective_coeffs ,
398
+ )
399
+
400
+
401
+ @register_kernel_info_from_sample_inputs_fn
402
+ def perspective_segmentation_mask ():
403
+ for mask , perspective_coeffs in itertools .product (
404
+ make_segmentation_masks (extra_dims = ((), (4 ,))),
405
+ [
406
+ [1.2405 , 0.1772 , - 6.9113 , 0.0463 , 1.251 , - 5.235 , 0.00013 , 0.0018 ],
407
+ [0.7366 , - 0.11724 , 1.45775 , - 0.15012 , 0.73406 , 2.6019 , - 0.0072 , - 0.0063 ],
408
+ ],
409
+ ):
410
+ yield SampleInput (
411
+ mask ,
412
+ perspective_coeffs = perspective_coeffs ,
413
+ )
414
+
415
+
383
416
@register_kernel_info_from_sample_inputs_fn
384
417
def center_crop_bounding_box ():
385
418
for bounding_box , output_size in itertools .product (make_bounding_boxes (), [(24 , 12 ), [16 , 18 ], [46 , 48 ], [12 ]]):
@@ -993,7 +1026,7 @@ def test_correctness_vertical_flip_segmentation_mask_on_fixed_input(device):
993
1026
],
994
1027
)
995
1028
def test_correctness_resized_crop_bounding_box (device , format , top , left , height , width , size ):
996
- def _compute_expected (bbox , top_ , left_ , height_ , width_ , size_ ):
1029
+ def _compute_expected_bbox (bbox , top_ , left_ , height_ , width_ , size_ ):
997
1030
# bbox should be xyxy
998
1031
bbox [0 ] = (bbox [0 ] - left_ ) * size_ [1 ] / width_
999
1032
bbox [1 ] = (bbox [1 ] - top_ ) * size_ [0 ] / height_
@@ -1009,7 +1042,7 @@ def _compute_expected(bbox, top_, left_, height_, width_, size_):
1009
1042
]
1010
1043
expected_bboxes = []
1011
1044
for in_box in in_boxes :
1012
- expected_bboxes .append (_compute_expected (list (in_box ), top , left , height , width , size ))
1045
+ expected_bboxes .append (_compute_expected_bbox (list (in_box ), top , left , height , width , size ))
1013
1046
expected_bboxes = torch .tensor (expected_bboxes , device = device )
1014
1047
1015
1048
in_boxes = features .BoundingBox (
@@ -1035,7 +1068,7 @@ def _compute_expected(bbox, top_, left_, height_, width_, size_):
1035
1068
],
1036
1069
)
1037
1070
def test_correctness_resized_crop_segmentation_mask (device , top , left , height , width , size ):
1038
- def _compute_expected (mask , top_ , left_ , height_ , width_ , size_ ):
1071
+ def _compute_expected_mask (mask , top_ , left_ , height_ , width_ , size_ ):
1039
1072
output = mask .clone ()
1040
1073
output = output [:, top_ : top_ + height_ , left_ : left_ + width_ ]
1041
1074
output = torch .nn .functional .interpolate (output [None , :].float (), size = size_ , mode = "nearest" )
@@ -1046,7 +1079,7 @@ def _compute_expected(mask, top_, left_, height_, width_, size_):
1046
1079
in_mask [0 , 10 :20 , 10 :20 ] = 1
1047
1080
in_mask [0 , 5 :15 , 12 :23 ] = 2
1048
1081
1049
- expected_mask = _compute_expected (in_mask , top , left , height , width , size )
1082
+ expected_mask = _compute_expected_mask (in_mask , top , left , height , width , size )
1050
1083
output_mask = F .resized_crop_segmentation_mask (in_mask , top , left , height , width , size )
1051
1084
torch .testing .assert_close (output_mask , expected_mask )
1052
1085
@@ -1095,6 +1128,161 @@ def parse_padding():
1095
1128
torch .testing .assert_close (out_mask , expected_mask )
1096
1129
1097
1130
1131
+ @pytest .mark .parametrize ("device" , cpu_and_gpu ())
1132
+ @pytest .mark .parametrize (
1133
+ "startpoints, endpoints" ,
1134
+ [
1135
+ [[[0 , 0 ], [33 , 0 ], [33 , 25 ], [0 , 25 ]], [[3 , 2 ], [32 , 3 ], [30 , 24 ], [2 , 25 ]]],
1136
+ [[[3 , 2 ], [32 , 3 ], [30 , 24 ], [2 , 25 ]], [[0 , 0 ], [33 , 0 ], [33 , 25 ], [0 , 25 ]]],
1137
+ [[[3 , 2 ], [32 , 3 ], [30 , 24 ], [2 , 25 ]], [[5 , 5 ], [30 , 3 ], [33 , 19 ], [4 , 25 ]]],
1138
+ ],
1139
+ )
1140
+ def test_correctness_perspective_bounding_box (device , startpoints , endpoints ):
1141
+ def _compute_expected_bbox (bbox , pcoeffs_ ):
1142
+ m1 = np .array (
1143
+ [
1144
+ [pcoeffs_ [0 ], pcoeffs_ [1 ], pcoeffs_ [2 ]],
1145
+ [pcoeffs_ [3 ], pcoeffs_ [4 ], pcoeffs_ [5 ]],
1146
+ ]
1147
+ )
1148
+ m2 = np .array (
1149
+ [
1150
+ [pcoeffs_ [6 ], pcoeffs_ [7 ], 1.0 ],
1151
+ [pcoeffs_ [6 ], pcoeffs_ [7 ], 1.0 ],
1152
+ ]
1153
+ )
1154
+
1155
+ bbox_xyxy = convert_bounding_box_format (
1156
+ bbox , old_format = bbox .format , new_format = features .BoundingBoxFormat .XYXY
1157
+ )
1158
+ points = np .array (
1159
+ [
1160
+ [bbox_xyxy [0 ].item (), bbox_xyxy [1 ].item (), 1.0 ],
1161
+ [bbox_xyxy [2 ].item (), bbox_xyxy [1 ].item (), 1.0 ],
1162
+ [bbox_xyxy [0 ].item (), bbox_xyxy [3 ].item (), 1.0 ],
1163
+ [bbox_xyxy [2 ].item (), bbox_xyxy [3 ].item (), 1.0 ],
1164
+ ]
1165
+ )
1166
+ numer = np .matmul (points , m1 .T )
1167
+ denom = np .matmul (points , m2 .T )
1168
+ transformed_points = numer / denom
1169
+ out_bbox = [
1170
+ np .min (transformed_points [:, 0 ]),
1171
+ np .min (transformed_points [:, 1 ]),
1172
+ np .max (transformed_points [:, 0 ]),
1173
+ np .max (transformed_points [:, 1 ]),
1174
+ ]
1175
+ out_bbox = features .BoundingBox (
1176
+ out_bbox ,
1177
+ format = features .BoundingBoxFormat .XYXY ,
1178
+ image_size = bbox .image_size ,
1179
+ dtype = torch .float32 ,
1180
+ device = bbox .device ,
1181
+ )
1182
+ return convert_bounding_box_format (
1183
+ out_bbox , old_format = features .BoundingBoxFormat .XYXY , new_format = bbox .format , copy = False
1184
+ )
1185
+
1186
+ image_size = (32 , 38 )
1187
+
1188
+ pcoeffs = _get_perspective_coeffs (startpoints , endpoints )
1189
+ inv_pcoeffs = _get_perspective_coeffs (endpoints , startpoints )
1190
+
1191
+ for bboxes in make_bounding_boxes (
1192
+ image_sizes = [
1193
+ image_size ,
1194
+ ],
1195
+ extra_dims = ((4 ,),),
1196
+ ):
1197
+ bboxes = bboxes .to (device )
1198
+ bboxes_format = bboxes .format
1199
+ bboxes_image_size = bboxes .image_size
1200
+
1201
+ output_bboxes = F .perspective_bounding_box (
1202
+ bboxes ,
1203
+ bboxes_format ,
1204
+ perspective_coeffs = pcoeffs ,
1205
+ )
1206
+
1207
+ if bboxes .ndim < 2 :
1208
+ bboxes = [bboxes ]
1209
+
1210
+ expected_bboxes = []
1211
+ for bbox in bboxes :
1212
+ bbox = features .BoundingBox (bbox , format = bboxes_format , image_size = bboxes_image_size )
1213
+ expected_bboxes .append (_compute_expected_bbox (bbox , inv_pcoeffs ))
1214
+ if len (expected_bboxes ) > 1 :
1215
+ expected_bboxes = torch .stack (expected_bboxes )
1216
+ else :
1217
+ expected_bboxes = expected_bboxes [0 ]
1218
+ torch .testing .assert_close (output_bboxes , expected_bboxes , rtol = 1e-5 , atol = 1e-5 )
1219
+
1220
+
1221
+ @pytest .mark .parametrize ("device" , cpu_and_gpu ())
1222
+ @pytest .mark .parametrize (
1223
+ "startpoints, endpoints" ,
1224
+ [
1225
+ [[[0 , 0 ], [33 , 0 ], [33 , 25 ], [0 , 25 ]], [[3 , 2 ], [32 , 3 ], [30 , 24 ], [2 , 25 ]]],
1226
+ [[[3 , 2 ], [32 , 3 ], [30 , 24 ], [2 , 25 ]], [[0 , 0 ], [33 , 0 ], [33 , 25 ], [0 , 25 ]]],
1227
+ [[[3 , 2 ], [32 , 3 ], [30 , 24 ], [2 , 25 ]], [[5 , 5 ], [30 , 3 ], [33 , 19 ], [4 , 25 ]]],
1228
+ ],
1229
+ )
1230
+ def test_correctness_perspective_segmentation_mask (device , startpoints , endpoints ):
1231
+ def _compute_expected_mask (mask , pcoeffs_ ):
1232
+ assert mask .ndim == 3 and mask .shape [0 ] == 1
1233
+ m1 = np .array (
1234
+ [
1235
+ [pcoeffs_ [0 ], pcoeffs_ [1 ], pcoeffs_ [2 ]],
1236
+ [pcoeffs_ [3 ], pcoeffs_ [4 ], pcoeffs_ [5 ]],
1237
+ ]
1238
+ )
1239
+ m2 = np .array (
1240
+ [
1241
+ [pcoeffs_ [6 ], pcoeffs_ [7 ], 1.0 ],
1242
+ [pcoeffs_ [6 ], pcoeffs_ [7 ], 1.0 ],
1243
+ ]
1244
+ )
1245
+
1246
+ expected_mask = torch .zeros_like (mask .cpu ())
1247
+ for out_y in range (expected_mask .shape [1 ]):
1248
+ for out_x in range (expected_mask .shape [2 ]):
1249
+ output_pt = np .array ([out_x + 0.5 , out_y + 0.5 , 1.0 ])
1250
+
1251
+ numer = np .matmul (output_pt , m1 .T )
1252
+ denom = np .matmul (output_pt , m2 .T )
1253
+ input_pt = np .floor (numer / denom ).astype (np .int32 )
1254
+
1255
+ in_x , in_y = input_pt [:2 ]
1256
+ if 0 <= in_x < mask .shape [2 ] and 0 <= in_y < mask .shape [1 ]:
1257
+ expected_mask [0 , out_y , out_x ] = mask [0 , in_y , in_x ]
1258
+ return expected_mask .to (mask .device )
1259
+
1260
+ pcoeffs = _get_perspective_coeffs (startpoints , endpoints )
1261
+
1262
+ for mask in make_segmentation_masks (extra_dims = ((), (4 ,))):
1263
+ mask = mask .to (device )
1264
+
1265
+ output_mask = F .perspective_segmentation_mask (
1266
+ mask ,
1267
+ perspective_coeffs = pcoeffs ,
1268
+ )
1269
+
1270
+ if mask .ndim < 4 :
1271
+ masks = [mask ]
1272
+ else :
1273
+ masks = [m for m in mask ]
1274
+
1275
+ expected_masks = []
1276
+ for mask in masks :
1277
+ expected_mask = _compute_expected_mask (mask , pcoeffs )
1278
+ expected_masks .append (expected_mask )
1279
+ if len (expected_masks ) > 1 :
1280
+ expected_masks = torch .stack (expected_masks )
1281
+ else :
1282
+ expected_masks = expected_masks [0 ]
1283
+ torch .testing .assert_close (output_mask , expected_masks )
1284
+
1285
+
1098
1286
@pytest .mark .parametrize ("device" , cpu_and_gpu ())
1099
1287
@pytest .mark .parametrize (
1100
1288
"output_size" ,
@@ -1148,5 +1336,4 @@ def _compute_expected_bbox(bbox, output_size_):
1148
1336
expected_bboxes = torch .stack (expected_bboxes )
1149
1337
else :
1150
1338
expected_bboxes = expected_bboxes [0 ]
1151
- expected_bboxes = expected_bboxes .to (device = device )
1152
1339
torch .testing .assert_close (output_boxes , expected_bboxes )
0 commit comments