@@ -492,38 +492,39 @@ def hflip(img: Tensor) -> Tensor:
492
492
493
493
494
494
def _get_perspective_coeffs (
495
- startpoints : List [Tuple [int , int ]], endpoints : List [Tuple [ int , int ]]
495
+ startpoints : List [List [int ]], endpoints : List [List [ int ]]
496
496
) -> List [float ]:
497
497
"""Helper function to get the coefficients (a, b, c, d, e, f, g, h) for the perspective transforms.
498
498
499
499
In Perspective Transform each pixel (x, y) in the original image gets transformed as,
500
500
(x, y) -> ( (ax + by + c) / (gx + hy + 1), (dx + ey + f) / (gx + hy + 1) )
501
501
502
502
Args:
503
- startpoints (list of tuples ): List containing four tuples of two integers corresponding to four corners
503
+ startpoints (list of list of ints ): List containing four lists of two integers corresponding to four corners
504
504
``[top-left, top-right, bottom-right, bottom-left]`` of the original image.
505
- endpoints (list of tuples ): List containing four tuples of two integers corresponding to four corners
505
+ endpoints (list of list of ints ): List containing four lists of two integers corresponding to four corners
506
506
``[top-left, top-right, bottom-right, bottom-left]`` of the transformed image.
507
507
508
508
Returns:
509
509
octuple (a, b, c, d, e, f, g, h) for transforming each pixel.
510
510
"""
511
- matrix = []
511
+ a_matrix = torch . zeros ( 2 * len ( startpoints ), 8 , dtype = torch . float )
512
512
513
- for p1 , p2 in zip (endpoints , startpoints ):
514
- matrix . append ([p1 [0 ], p1 [1 ], 1 , 0 , 0 , 0 , - p2 [0 ] * p1 [0 ], - p2 [0 ] * p1 [1 ]])
515
- matrix . append ([0 , 0 , 0 , p1 [0 ], p1 [1 ], 1 , - p2 [1 ] * p1 [0 ], - p2 [1 ] * p1 [1 ]])
513
+ for i , ( p1 , p2 ) in enumerate ( zip (endpoints , startpoints ) ):
514
+ a_matrix [ 2 * i , :] = torch . tensor ([p1 [0 ], p1 [1 ], 1 , 0 , 0 , 0 , - p2 [0 ] * p1 [0 ], - p2 [0 ] * p1 [1 ]])
515
+ a_matrix [ 2 * i + 1 , :] = torch . tensor ([0 , 0 , 0 , p1 [0 ], p1 [1 ], 1 , - p2 [1 ] * p1 [0 ], - p2 [1 ] * p1 [1 ]])
516
516
517
- A = torch .tensor (matrix , dtype = torch .float )
518
- B = torch .tensor (startpoints , dtype = torch .float ).view (8 )
519
- res = torch .lstsq (B , A )[0 ]
520
- return res .squeeze_ (1 ).tolist ()
517
+ b_matrix = torch .tensor (startpoints , dtype = torch .float ).view (8 )
518
+ res = torch .lstsq (b_matrix , a_matrix )[0 ]
519
+ # We have to explicitly produce the list of floats, otherwise torch.jit.script does recognize output type
520
+ # RuntimeError: Expected type hint for result of tolist()
521
+ return [float (i .item ()) for i in res [:, 0 ]]
521
522
522
523
523
524
def perspective (
524
525
img : Tensor ,
525
- startpoints : List [Tuple [ int , int ]],
526
- endpoints : List [Tuple [ int , int ]],
526
+ startpoints : List [List [ int ]],
527
+ endpoints : List [List [ int ]],
527
528
interpolation : int = 3 ,
528
529
fill : Optional [int ] = None
529
530
) -> Tensor :
@@ -533,9 +534,9 @@ def perspective(
533
534
534
535
Args:
535
536
img (PIL Image or Tensor): Image to be transformed.
536
- startpoints (list of tuples ): List containing four tuples of two integers corresponding to four corners
537
+ startpoints (list of list of ints ): List containing four lists of two integers corresponding to four corners
537
538
``[top-left, top-right, bottom-right, bottom-left]`` of the original image.
538
- endpoints (list of tuples ): List containing four tuples of two integers corresponding to four corners
539
+ endpoints (list of list of ints ): List containing four lists of two integers corresponding to four corners
539
540
``[top-left, top-right, bottom-right, bottom-left]`` of the transformed image.
540
541
interpolation (int): Interpolation type. If input is Tensor, only ``PIL.Image.NEAREST`` and
541
542
``PIL.Image.BILINEAR`` are supported. Default, ``PIL.Image.BICUBIC`` for PIL images and
@@ -546,15 +547,20 @@ def perspective(
546
547
input. Fill value for the area outside the transform in the output image is always 0.
547
548
548
549
Returns:
549
- PIL Image or Tensor: Perspectively transformed Image.
550
+ PIL Image or Tensor: transformed Image.
550
551
"""
551
552
552
553
coeffs = _get_perspective_coeffs (startpoints , endpoints )
553
554
554
555
if not isinstance (img , torch .Tensor ):
555
556
return F_pil .perspective (img , coeffs , interpolation = interpolation , fill = fill )
556
557
557
- return F_t .perspective ()
558
+ if interpolation == Image .BICUBIC :
559
+ # bicubic is not supported by pytorch
560
+ # set to bilinear interpolation
561
+ interpolation = 2
562
+
563
+ return F_t .perspective (img , coeffs , interpolation = interpolation , fill = fill )
558
564
559
565
560
566
def vflip (img : Tensor ) -> Tensor :
0 commit comments