@@ -491,53 +491,70 @@ def hflip(img: Tensor) -> Tensor:
491
491
return F_t .hflip (img )
492
492
493
493
494
- def _get_perspective_coeffs (startpoints , endpoints ):
494
+ def _get_perspective_coeffs (
495
+ startpoints : List [List [int ]], endpoints : List [List [int ]]
496
+ ) -> List [float ]:
495
497
"""Helper function to get the coefficients (a, b, c, d, e, f, g, h) for the perspective transforms.
496
498
497
499
In Perspective Transform each pixel (x, y) in the original image gets transformed as,
498
500
(x, y) -> ( (ax + by + c) / (gx + hy + 1), (dx + ey + f) / (gx + hy + 1) )
499
501
500
502
Args:
501
- List containing [top-left, top-right, bottom-right, bottom-left] of the original image,
502
- List containing [top-left, top-right, bottom-right, bottom-left] of the transformed image
503
+ startpoints (list of list of ints): List containing four lists of two integers corresponding to four corners
504
+ ``[top-left, top-right, bottom-right, bottom-left]`` of the original image.
505
+ endpoints (list of list of ints): List containing four lists of two integers corresponding to four corners
506
+ ``[top-left, top-right, bottom-right, bottom-left]`` of the transformed image.
507
+
503
508
Returns:
504
509
octuple (a, b, c, d, e, f, g, h) for transforming each pixel.
505
510
"""
506
- matrix = []
511
+ a_matrix = torch .zeros (2 * len (startpoints ), 8 , dtype = torch .float )
512
+
513
+ for i , (p1 , p2 ) in enumerate (zip (endpoints , startpoints )):
514
+ a_matrix [2 * i , :] = torch .tensor ([p1 [0 ], p1 [1 ], 1 , 0 , 0 , 0 , - p2 [0 ] * p1 [0 ], - p2 [0 ] * p1 [1 ]])
515
+ a_matrix [2 * i + 1 , :] = torch .tensor ([0 , 0 , 0 , p1 [0 ], p1 [1 ], 1 , - p2 [1 ] * p1 [0 ], - p2 [1 ] * p1 [1 ]])
507
516
508
- for p1 , p2 in zip (endpoints , startpoints ):
509
- matrix .append ([p1 [0 ], p1 [1 ], 1 , 0 , 0 , 0 , - p2 [0 ] * p1 [0 ], - p2 [0 ] * p1 [1 ]])
510
- matrix .append ([0 , 0 , 0 , p1 [0 ], p1 [1 ], 1 , - p2 [1 ] * p1 [0 ], - p2 [1 ] * p1 [1 ]])
517
+ b_matrix = torch .tensor (startpoints , dtype = torch .float ).view (8 )
518
+ res = torch .lstsq (b_matrix , a_matrix )[0 ]
511
519
512
- A = torch .tensor (matrix , dtype = torch .float )
513
- B = torch .tensor (startpoints , dtype = torch .float ).view (8 )
514
- res = torch .lstsq (B , A )[0 ]
515
- return res .squeeze_ (1 ).tolist ()
520
+ output : List [float ] = res .squeeze (1 ).tolist ()
521
+ return output
516
522
517
523
518
- def perspective (img , startpoints , endpoints , interpolation = Image .BICUBIC , fill = None ):
519
- """Perform perspective transform of the given PIL Image.
524
+ def perspective (
525
+ img : Tensor ,
526
+ startpoints : List [List [int ]],
527
+ endpoints : List [List [int ]],
528
+ interpolation : int = 2 ,
529
+ fill : Optional [int ] = None
530
+ ) -> Tensor :
531
+ """Perform perspective transform of the given image.
532
+ The image can be a PIL Image or a Tensor, in which case it is expected
533
+ to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
520
534
521
535
Args:
522
- img (PIL Image): Image to be transformed.
523
- startpoints: List containing [top-left, top-right, bottom-right, bottom-left] of the original image
524
- endpoints: List containing [top-left, top-right, bottom-right, bottom-left] of the transformed image
525
- interpolation: Default- Image.BICUBIC
536
+ img (PIL Image or Tensor): Image to be transformed.
537
+ startpoints (list of list of ints): List containing four lists of two integers corresponding to four corners
538
+ ``[top-left, top-right, bottom-right, bottom-left]`` of the original image.
539
+ endpoints (list of list of ints): List containing four lists of two integers corresponding to four corners
540
+ ``[top-left, top-right, bottom-right, bottom-left]`` of the transformed image.
541
+ interpolation (int): Interpolation type. If input is Tensor, only ``PIL.Image.NEAREST`` and
542
+ ``PIL.Image.BILINEAR`` are supported. Default, ``PIL.Image.BILINEAR`` for PIL images and Tensors.
526
543
fill (n-tuple or int or float): Pixel fill value for area outside the rotated
527
544
image. If int or float, the value is used for all bands respectively.
528
- This option is only available for ``pillow>=5.0.0``.
545
+ This option is only available for ``pillow>=5.0.0``. This option is not supported for Tensor
546
+ input. Fill value for the area outside the transform in the output image is always 0.
529
547
530
548
Returns:
531
- PIL Image: Perspectively transformed Image.
549
+ PIL Image or Tensor: transformed Image.
532
550
"""
533
551
534
- if not F_pil ._is_pil_image (img ):
535
- raise TypeError ('img should be PIL Image. Got {}' .format (type (img )))
552
+ coeffs = _get_perspective_coeffs (startpoints , endpoints )
536
553
537
- opts = _parse_fill (fill , img , '5.0.0' )
554
+ if not isinstance (img , torch .Tensor ):
555
+ return F_pil .perspective (img , coeffs , interpolation = interpolation , fill = fill )
538
556
539
- coeffs = _get_perspective_coeffs (startpoints , endpoints )
540
- return img .transform (img .size , Image .PERSPECTIVE , coeffs , interpolation , ** opts )
557
+ return F_t .perspective (img , coeffs , interpolation = interpolation , fill = fill )
541
558
542
559
543
560
def vflip (img : Tensor ) -> Tensor :
0 commit comments