@@ -108,7 +108,7 @@ def float32_vs_uint8_pixel_difference(atol=1, mae=False):
108
108
}
109
109
110
110
111
- def scripted_vs_eager_double_pixel_difference (device , atol = 1e-6 , rtol = 1e-6 ):
111
+ def scripted_vs_eager_float64_tolerances (device , atol = 1e-6 , rtol = 1e-6 ):
112
112
return {
113
113
(("TestKernels" , "test_scripted_vs_eager" ), torch .float64 , device ): {"atol" : atol , "rtol" : rtol , "mae" : False },
114
114
}
@@ -211,10 +211,12 @@ def reference_horizontal_flip_bounding_box(bounding_box, *, format, spatial_size
211
211
[- 1 , 0 , spatial_size [1 ]],
212
212
[0 , 1 , 0 ],
213
213
],
214
- dtype = "float32" ,
214
+ dtype = "float64" if bounding_box . dtype == torch . float64 else " float32" ,
215
215
)
216
216
217
- expected_bboxes = reference_affine_bounding_box_helper (bounding_box , format = format , affine_matrix = affine_matrix )
217
+ expected_bboxes = reference_affine_bounding_box_helper (
218
+ bounding_box , format = format , spatial_size = spatial_size , affine_matrix = affine_matrix
219
+ )
218
220
219
221
return expected_bboxes
220
222
@@ -322,7 +324,7 @@ def reference_inputs_resize_image_tensor():
322
324
def sample_inputs_resize_bounding_box ():
323
325
for bounding_box_loader in make_bounding_box_loaders ():
324
326
for size in _get_resize_sizes (bounding_box_loader .spatial_size ):
325
- yield ArgsKwargs (bounding_box_loader , size = size , spatial_size = bounding_box_loader .spatial_size )
327
+ yield ArgsKwargs (bounding_box_loader , spatial_size = bounding_box_loader .spatial_size , size = size )
326
328
327
329
328
330
def sample_inputs_resize_mask ():
@@ -344,19 +346,20 @@ def reference_resize_bounding_box(bounding_box, *, spatial_size, size, max_size=
344
346
[new_width / old_width , 0 , 0 ],
345
347
[0 , new_height / old_height , 0 ],
346
348
],
347
- dtype = "float32" ,
349
+ dtype = "float64" if bounding_box . dtype == torch . float64 else " float32" ,
348
350
)
349
351
350
352
expected_bboxes = reference_affine_bounding_box_helper (
351
- bounding_box , format = datapoints .BoundingBoxFormat .XYXY , affine_matrix = affine_matrix
353
+ bounding_box ,
354
+ format = bounding_box .format ,
355
+ spatial_size = (new_height , new_width ),
356
+ affine_matrix = affine_matrix ,
352
357
)
353
358
return expected_bboxes , (new_height , new_width )
354
359
355
360
356
361
def reference_inputs_resize_bounding_box ():
357
- for bounding_box_loader in make_bounding_box_loaders (
358
- formats = [datapoints .BoundingBoxFormat .XYXY ], extra_dims = ((), (4 ,))
359
- ):
362
+ for bounding_box_loader in make_bounding_box_loaders (extra_dims = ((), (4 ,))):
360
363
for size in _get_resize_sizes (bounding_box_loader .spatial_size ):
361
364
yield ArgsKwargs (bounding_box_loader , size = size , spatial_size = bounding_box_loader .spatial_size )
362
365
@@ -543,14 +546,17 @@ def _compute_affine_matrix(angle, translate, scale, shear, center):
543
546
return true_matrix
544
547
545
548
546
- def reference_affine_bounding_box_helper (bounding_box , * , format , affine_matrix ):
547
- def transform (bbox , affine_matrix_ , format_ ):
549
+ def reference_affine_bounding_box_helper (bounding_box , * , format , spatial_size , affine_matrix ):
550
+ def transform (bbox , affine_matrix_ , format_ , spatial_size_ ):
548
551
# Go to float before converting to prevent precision loss in case of CXCYWH -> XYXY and W or H is 1
549
552
in_dtype = bbox .dtype
550
553
if not torch .is_floating_point (bbox ):
551
554
bbox = bbox .float ()
552
555
bbox_xyxy = F .convert_format_bounding_box (
553
- bbox , old_format = format_ , new_format = datapoints .BoundingBoxFormat .XYXY , inplace = True
556
+ bbox .as_subclass (torch .Tensor ),
557
+ old_format = format_ ,
558
+ new_format = datapoints .BoundingBoxFormat .XYXY ,
559
+ inplace = True ,
554
560
)
555
561
points = np .array (
556
562
[
@@ -573,12 +579,15 @@ def transform(bbox, affine_matrix_, format_):
573
579
out_bbox = F .convert_format_bounding_box (
574
580
out_bbox , old_format = datapoints .BoundingBoxFormat .XYXY , new_format = format_ , inplace = True
575
581
)
576
- return out_bbox .to (dtype = in_dtype )
582
+ # It is important to clamp before casting, especially for CXCYWH format, dtype=int64
583
+ out_bbox = F .clamp_bounding_box (out_bbox , format = format_ , spatial_size = spatial_size_ )
584
+ out_bbox = out_bbox .to (dtype = in_dtype )
585
+ return out_bbox
577
586
578
587
if bounding_box .ndim < 2 :
579
588
bounding_box = [bounding_box ]
580
589
581
- expected_bboxes = [transform (bbox , affine_matrix , format ) for bbox in bounding_box ]
590
+ expected_bboxes = [transform (bbox , affine_matrix , format , spatial_size ) for bbox in bounding_box ]
582
591
if len (expected_bboxes ) > 1 :
583
592
expected_bboxes = torch .stack (expected_bboxes )
584
593
else :
@@ -594,7 +603,9 @@ def reference_affine_bounding_box(bounding_box, *, format, spatial_size, angle,
594
603
affine_matrix = _compute_affine_matrix (angle , translate , scale , shear , center )
595
604
affine_matrix = affine_matrix [:2 , :]
596
605
597
- expected_bboxes = reference_affine_bounding_box_helper (bounding_box , format = format , affine_matrix = affine_matrix )
606
+ expected_bboxes = reference_affine_bounding_box_helper (
607
+ bounding_box , format = format , spatial_size = spatial_size , affine_matrix = affine_matrix
608
+ )
598
609
599
610
return expected_bboxes
600
611
@@ -643,9 +654,6 @@ def sample_inputs_affine_video():
643
654
sample_inputs_fn = sample_inputs_affine_bounding_box ,
644
655
reference_fn = reference_affine_bounding_box ,
645
656
reference_inputs_fn = reference_inputs_affine_bounding_box ,
646
- closeness_kwargs = {
647
- (("TestKernels" , "test_against_reference" ), torch .int64 , "cpu" ): dict (atol = 1 , rtol = 0 ),
648
- },
649
657
test_marks = [
650
658
xfail_jit_python_scalar_arg ("shear" ),
651
659
],
@@ -729,10 +737,12 @@ def reference_vertical_flip_bounding_box(bounding_box, *, format, spatial_size):
729
737
[1 , 0 , 0 ],
730
738
[0 , - 1 , spatial_size [0 ]],
731
739
],
732
- dtype = "float32" ,
740
+ dtype = "float64" if bounding_box . dtype == torch . float64 else " float32" ,
733
741
)
734
742
735
- expected_bboxes = reference_affine_bounding_box_helper (bounding_box , format = format , affine_matrix = affine_matrix )
743
+ expected_bboxes = reference_affine_bounding_box_helper (
744
+ bounding_box , format = format , spatial_size = spatial_size , affine_matrix = affine_matrix
745
+ )
736
746
737
747
return expected_bboxes
738
748
@@ -806,6 +816,43 @@ def sample_inputs_rotate_bounding_box():
806
816
)
807
817
808
818
819
+ def reference_inputs_rotate_bounding_box ():
820
+ for bounding_box_loader , angle in itertools .product (
821
+ make_bounding_box_loaders (extra_dims = ((), (4 ,))), _ROTATE_ANGLES
822
+ ):
823
+ yield ArgsKwargs (
824
+ bounding_box_loader ,
825
+ format = bounding_box_loader .format ,
826
+ spatial_size = bounding_box_loader .spatial_size ,
827
+ angle = angle ,
828
+ )
829
+
830
+ # TODO: add samples with expand=True and center
831
+
832
+
833
+ def reference_rotate_bounding_box (bounding_box , * , format , spatial_size , angle , expand = False , center = None ):
834
+
835
+ if center is None :
836
+ center = [spatial_size [1 ] * 0.5 , spatial_size [0 ] * 0.5 ]
837
+
838
+ a = np .cos (angle * np .pi / 180.0 )
839
+ b = np .sin (angle * np .pi / 180.0 )
840
+ cx = center [0 ]
841
+ cy = center [1 ]
842
+ affine_matrix = np .array (
843
+ [
844
+ [a , b , cx - cx * a - b * cy ],
845
+ [- b , a , cy + cx * b - a * cy ],
846
+ ],
847
+ dtype = "float64" if bounding_box .dtype == torch .float64 else "float32" ,
848
+ )
849
+
850
+ expected_bboxes = reference_affine_bounding_box_helper (
851
+ bounding_box , format = format , spatial_size = spatial_size , affine_matrix = affine_matrix
852
+ )
853
+ return expected_bboxes , spatial_size
854
+
855
+
809
856
def sample_inputs_rotate_mask ():
810
857
for mask_loader in make_mask_loaders (sizes = ["random" ], num_categories = ["random" ], num_objects = ["random" ]):
811
858
yield ArgsKwargs (mask_loader , angle = 15.0 )
@@ -834,9 +881,11 @@ def sample_inputs_rotate_video():
834
881
KernelInfo (
835
882
F .rotate_bounding_box ,
836
883
sample_inputs_fn = sample_inputs_rotate_bounding_box ,
884
+ reference_fn = reference_rotate_bounding_box ,
885
+ reference_inputs_fn = reference_inputs_rotate_bounding_box ,
837
886
closeness_kwargs = {
838
- ** scripted_vs_eager_double_pixel_difference ("cpu" , atol = 1e-5 , rtol = 1e-5 ),
839
- ** scripted_vs_eager_double_pixel_difference ("cuda" , atol = 1e-5 , rtol = 1e-5 ),
887
+ ** scripted_vs_eager_float64_tolerances ("cpu" , atol = 1e-6 , rtol = 1e-6 ),
888
+ ** scripted_vs_eager_float64_tolerances ("cuda" , atol = 1e-5 , rtol = 1e-5 ),
840
889
},
841
890
),
842
891
KernelInfo (
@@ -897,17 +946,19 @@ def sample_inputs_crop_video():
897
946
898
947
899
948
def reference_crop_bounding_box (bounding_box , * , format , top , left , height , width ):
900
-
901
949
affine_matrix = np .array (
902
950
[
903
951
[1 , 0 , - left ],
904
952
[0 , 1 , - top ],
905
953
],
906
- dtype = "float32" ,
954
+ dtype = "float64" if bounding_box . dtype == torch . float64 else " float32" ,
907
955
)
908
956
909
- expected_bboxes = reference_affine_bounding_box_helper (bounding_box , format = format , affine_matrix = affine_matrix )
910
- return expected_bboxes , (height , width )
957
+ spatial_size = (height , width )
958
+ expected_bboxes = reference_affine_bounding_box_helper (
959
+ bounding_box , format = format , spatial_size = spatial_size , affine_matrix = affine_matrix
960
+ )
961
+ return expected_bboxes , spatial_size
911
962
912
963
913
964
def reference_inputs_crop_bounding_box ():
@@ -1119,13 +1170,15 @@ def reference_pad_bounding_box(bounding_box, *, format, spatial_size, padding, p
1119
1170
[1 , 0 , left ],
1120
1171
[0 , 1 , top ],
1121
1172
],
1122
- dtype = "float32" ,
1173
+ dtype = "float64" if bounding_box . dtype == torch . float64 else " float32" ,
1123
1174
)
1124
1175
1125
1176
height = spatial_size [0 ] + top + bottom
1126
1177
width = spatial_size [1 ] + left + right
1127
1178
1128
- expected_bboxes = reference_affine_bounding_box_helper (bounding_box , format = format , affine_matrix = affine_matrix )
1179
+ expected_bboxes = reference_affine_bounding_box_helper (
1180
+ bounding_box , format = format , spatial_size = (height , width ), affine_matrix = affine_matrix
1181
+ )
1129
1182
return expected_bboxes , (height , width )
1130
1183
1131
1184
@@ -1225,14 +1278,16 @@ def sample_inputs_perspective_bounding_box():
1225
1278
yield ArgsKwargs (
1226
1279
bounding_box_loader ,
1227
1280
format = bounding_box_loader .format ,
1281
+ spatial_size = bounding_box_loader .spatial_size ,
1228
1282
startpoints = None ,
1229
1283
endpoints = None ,
1230
1284
coefficients = _PERSPECTIVE_COEFFS [0 ],
1231
1285
)
1232
1286
1233
1287
format = datapoints .BoundingBoxFormat .XYXY
1288
+ loader = make_bounding_box_loader (format = format )
1234
1289
yield ArgsKwargs (
1235
- make_bounding_box_loader ( format = format ), format = format , startpoints = _STARTPOINTS , endpoints = _ENDPOINTS
1290
+ loader , format = format , spatial_size = loader . spatial_size , startpoints = _STARTPOINTS , endpoints = _ENDPOINTS
1236
1291
)
1237
1292
1238
1293
@@ -1269,13 +1324,17 @@ def sample_inputs_perspective_video():
1269
1324
** pil_reference_pixel_difference (2 , mae = True ),
1270
1325
** cuda_vs_cpu_pixel_difference (),
1271
1326
** float32_vs_uint8_pixel_difference (),
1272
- ** scripted_vs_eager_double_pixel_difference ("cpu" , atol = 1e-5 , rtol = 1e-5 ),
1273
- ** scripted_vs_eager_double_pixel_difference ("cuda" , atol = 1e-5 , rtol = 1e-5 ),
1327
+ ** scripted_vs_eager_float64_tolerances ("cpu" , atol = 1e-5 , rtol = 1e-5 ),
1328
+ ** scripted_vs_eager_float64_tolerances ("cuda" , atol = 1e-5 , rtol = 1e-5 ),
1274
1329
},
1275
1330
),
1276
1331
KernelInfo (
1277
1332
F .perspective_bounding_box ,
1278
1333
sample_inputs_fn = sample_inputs_perspective_bounding_box ,
1334
+ closeness_kwargs = {
1335
+ ** scripted_vs_eager_float64_tolerances ("cpu" , atol = 1e-6 , rtol = 1e-6 ),
1336
+ ** scripted_vs_eager_float64_tolerances ("cuda" , atol = 1e-6 , rtol = 1e-6 ),
1337
+ },
1279
1338
),
1280
1339
KernelInfo (
1281
1340
F .perspective_mask ,
@@ -1292,8 +1351,8 @@ def sample_inputs_perspective_video():
1292
1351
sample_inputs_fn = sample_inputs_perspective_video ,
1293
1352
closeness_kwargs = {
1294
1353
** cuda_vs_cpu_pixel_difference (),
1295
- ** scripted_vs_eager_double_pixel_difference ("cpu" , atol = 1e-5 , rtol = 1e-5 ),
1296
- ** scripted_vs_eager_double_pixel_difference ("cuda" , atol = 1e-5 , rtol = 1e-5 ),
1354
+ ** scripted_vs_eager_float64_tolerances ("cpu" , atol = 1e-5 , rtol = 1e-5 ),
1355
+ ** scripted_vs_eager_float64_tolerances ("cuda" , atol = 1e-5 , rtol = 1e-5 ),
1297
1356
},
1298
1357
),
1299
1358
]
@@ -1331,6 +1390,7 @@ def sample_inputs_elastic_bounding_box():
1331
1390
yield ArgsKwargs (
1332
1391
bounding_box_loader ,
1333
1392
format = bounding_box_loader .format ,
1393
+ spatial_size = bounding_box_loader .spatial_size ,
1334
1394
displacement = displacement ,
1335
1395
)
1336
1396
0 commit comments