diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst index 00d929d0675..c2e9855d9e8 100644 --- a/docs/source/transforms.rst +++ b/docs/source/transforms.rst @@ -131,6 +131,7 @@ Color ColorJitter v2.ColorJitter + v2.RandomPhotometricDistort Grayscale v2.Grayscale RandomGrayscale diff --git a/torchvision/transforms/v2/_color.py b/torchvision/transforms/v2/_color.py index 785a3965e60..2a581bf5640 100644 --- a/torchvision/transforms/v2/_color.py +++ b/torchvision/transforms/v2/_color.py @@ -46,7 +46,7 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: class RandomGrayscale(_RandomApplyTransform): - """[BETA] Randomly convert image to grayscale with a probability of p (default 0.1). + """[BETA] Randomly convert image or videos to grayscale with a probability of p (default 0.1). .. betastatus:: RandomGrayscale transform @@ -85,7 +85,7 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: class ColorJitter(Transform): - """[BETA] Randomly change the brightness, contrast, saturation and hue of an image. + """[BETA] Randomly change the brightness, contrast, saturation and hue of an image or video. .. betastatus:: ColorJitter transform @@ -190,6 +190,31 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: # TODO: This class seems to be untested class RandomPhotometricDistort(Transform): + """[BETA] Randomly distorts the image or video as used in `SSD: Single Shot + MultiBox Detector `_. + + .. betastatus:: RandomPhotometricDistort transform + + This transform relies on :class:`~torchvision.transforms.v2.ColorJitter` + under the hood to adjust the contrast, saturation, hue, brightness, and also + randomly permutes channels. + + Args: + brightness (tuple of float (min, max), optional): How much to jitter brightness. + brightness_factor is chosen uniformly from [min, max]. Should be non negative numbers. + contrast tuple of float (min, max), optional): How much to jitter contrast. + contrast_factor is chosen uniformly from [min, max]. Should be non-negative numbers. + saturation (tuple of float (min, max), optional): How much to jitter saturation. + saturation_factor is chosen uniformly from [min, max]. Should be non negative numbers. + hue (tuple of float (min, max), optional): How much to jitter hue. + hue_factor is chosen uniformly from [min, max]. Should have -0.5 <= min <= max <= 0.5. + To jitter hue, the pixel values of the input image has to be non-negative for conversion to HSV space; + thus it does not work if you normalize your image to an interval with negative values, + or use an interpolation that generates negative values before using this function. + p (float, optional) probability each distortion operation (contrast, saturation, ...) to be applied. + Default is 0.5. + """ + _transformed_types = ( datapoints.Image, PIL.Image.Image, @@ -199,10 +224,10 @@ class RandomPhotometricDistort(Transform): def __init__( self, + brightness: Tuple[float, float] = (0.875, 1.125), contrast: Tuple[float, float] = (0.5, 1.5), saturation: Tuple[float, float] = (0.5, 1.5), hue: Tuple[float, float] = (-0.05, 0.05), - brightness: Tuple[float, float] = (0.875, 1.125), p: float = 0.5, ): super().__init__() @@ -266,7 +291,7 @@ def _transform( class RandomEqualize(_RandomApplyTransform): - """[BETA] Equalize the histogram of the given image randomly with a given probability. + """[BETA] Equalize the histogram of the given image or video with a given probability. .. betastatus:: RandomEqualize transform @@ -285,7 +310,7 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: class RandomInvert(_RandomApplyTransform): - """[BETA] Inverts the colors of the given image randomly with a given probability. + """[BETA] Inverts the colors of the given image or video with a given probability. .. betastatus:: RandomInvert transform @@ -304,7 +329,7 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: class RandomPosterize(_RandomApplyTransform): - """[BETA] Posterize the image randomly with a given probability by reducing the + """[BETA] Posterize the image or video with a given probability by reducing the number of bits for each color channel. .. betastatus:: RandomPosterize transform @@ -329,7 +354,7 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: class RandomSolarize(_RandomApplyTransform): - """[BETA] Solarize the image randomly with a given probability by inverting all pixel + """[BETA] Solarize the image or video with a given probability by inverting all pixel values above a threshold. .. betastatus:: RandomSolarize transform @@ -354,7 +379,7 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: class RandomAutocontrast(_RandomApplyTransform): - """[BETA] Autocontrast the pixels of the given image randomly with a given probability. + """[BETA] Autocontrast the pixels of the given image or video with a given probability. .. betastatus:: RandomAutocontrast transform @@ -373,7 +398,7 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: class RandomAdjustSharpness(_RandomApplyTransform): - """[BETA] Adjust the sharpness of the image randomly with a given probability. + """[BETA] Adjust the sharpness of the image or video with a given probability. .. betastatus:: RandomAdjustSharpness transform