Skip to content

Commit 928b05c

Browse files
NicolasHugvfdev-5pmeier
authored
Added docs for v2 transforms (part 1) (#7297)
Co-authored-by: vfdev <[email protected]> Co-authored-by: Philip Meier <[email protected]>
1 parent d03b776 commit 928b05c

File tree

11 files changed

+850
-2
lines changed

11 files changed

+850
-2
lines changed

docs/source/conf.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333

3434
sys.path.append(os.path.abspath("."))
3535

36+
torchvision.disable_beta_transforms_warning()
37+
3638
# -- General configuration ------------------------------------------------
3739

3840
# Required version of sphinx is set from docs/requirements.txt

docs/source/transforms.rst

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,17 +98,29 @@ Geometry
9898
:template: class.rst
9999

100100
Resize
101+
v2.Resize
101102
RandomCrop
103+
v2.RandomCrop
102104
RandomResizedCrop
105+
v2.RandomResizedCrop
103106
CenterCrop
107+
v2.CenterCrop
104108
FiveCrop
109+
v2.FiveCrop
105110
TenCrop
111+
v2.TenCrop
106112
Pad
113+
v2.Pad
107114
RandomAffine
115+
v2.RandomAffine
108116
RandomPerspective
117+
v2.RandomPerspective
109118
RandomRotation
119+
v2.RandomRotation
110120
RandomHorizontalFlip
121+
v2.RandomHorizontalFlip
111122
RandomVerticalFlip
123+
v2.RandomVerticalFlip
112124

113125
Color
114126
-----
@@ -118,15 +130,25 @@ Color
118130
:template: class.rst
119131

120132
ColorJitter
133+
v2.ColorJitter
121134
Grayscale
135+
v2.Grayscale
122136
RandomGrayscale
137+
v2.RandomGrayscale
123138
GaussianBlur
139+
v2.GaussianBlur
124140
RandomInvert
141+
v2.RandomInvert
125142
RandomPosterize
143+
v2.RandomPosterize
126144
RandomSolarize
145+
v2.RandomSolarize
127146
RandomAdjustSharpness
147+
v2.RandomAdjustSharpness
128148
RandomAutocontrast
149+
v2.RandomAutocontrast
129150
RandomEqualize
151+
v2.RandomEqualize
130152

131153
Composition
132154
-----------
@@ -136,9 +158,13 @@ Composition
136158
:template: class.rst
137159

138160
Compose
161+
v2.Compose
139162
RandomApply
163+
v2.RandomApply
140164
RandomChoice
165+
v2.RandomChoice
141166
RandomOrder
167+
v2.RandomOrder
142168

143169
Miscellaneous
144170
-------------
@@ -148,9 +174,13 @@ Miscellaneous
148174
:template: class.rst
149175

150176
LinearTransformation
177+
v2.LinearTransformation
151178
Normalize
179+
v2.Normalize
152180
RandomErasing
181+
v2.RandomErasing
153182
Lambda
183+
v2.Lambda
154184

155185
.. _conversion_transforms:
156186

@@ -162,9 +192,15 @@ Conversion
162192
:template: class.rst
163193

164194
ToPILImage
195+
v2.ToPILImage
196+
v2.ToImagePIL
165197
ToTensor
198+
v2.ToTensor
166199
PILToTensor
200+
v2.PILToTensor
167201
ConvertImageDtype
202+
v2.ConvertImageDtype
203+
v2.ConvertDtype
168204

169205
Auto-Augmentation
170206
-----------------
@@ -181,9 +217,13 @@ The new transform can be used standalone or mixed-and-matched with existing tran
181217

182218
AutoAugmentPolicy
183219
AutoAugment
220+
v2.AutoAugment
184221
RandAugment
222+
v2.RandAugment
185223
TrivialAugmentWide
224+
v2.TrivialAugmentWide
186225
AugMix
226+
v2.AugMix
187227

188228
.. _functional_transforms:
189229

torchvision/transforms/v2/_augment.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,38 @@
1313

1414

1515
class RandomErasing(_RandomApplyTransform):
16+
"""[BETA] Randomly selects a rectangle region in the input image or video and erases its pixels.
17+
18+
.. betastatus:: RandomErasing transform
19+
20+
This transform does not support PIL Image.
21+
'Random Erasing Data Augmentation' by Zhong et al. See https://arxiv.org/abs/1708.04896
22+
23+
Args:
24+
p: probability that the random erasing operation will be performed.
25+
scale: range of proportion of erased area against input image.
26+
ratio: range of aspect ratio of erased area.
27+
value: erasing value. Default is 0. If a single int, it is used to
28+
erase all pixels. If a tuple of length 3, it is used to erase
29+
R, G, B channels respectively.
30+
If a str of 'random', erasing each pixel with random values.
31+
inplace: boolean to make this transform inplace. Default set to False.
32+
33+
Returns:
34+
Erased input.
35+
36+
Example:
37+
>>> from torchvision.transforms import v2 as transforms
38+
>>>
39+
>>> transform = transforms.Compose([
40+
>>> transforms.RandomHorizontalFlip(),
41+
>>> transforms.PILToTensor(),
42+
>>> transforms.ConvertImageDtype(torch.float),
43+
>>> transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
44+
>>> transforms.RandomErasing(),
45+
>>> ])
46+
"""
47+
1648
_v1_transform_cls = _transforms.RandomErasing
1749

1850
def _extract_params_for_v1_transform(self) -> Dict[str, Any]:

torchvision/transforms/v2/_auto_augment.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,24 @@ def _apply_image_or_video_transform(
162162

163163

164164
class AutoAugment(_AutoAugmentBase):
165+
r"""[BETA] AutoAugment data augmentation method based on
166+
`"AutoAugment: Learning Augmentation Strategies from Data" <https://arxiv.org/pdf/1805.09501.pdf>`_.
167+
168+
.. betastatus:: AutoAugment transform
169+
170+
If the image is torch Tensor, it should be of type torch.uint8, and it is expected
171+
to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
172+
If img is PIL Image, it is expected to be in mode "L" or "RGB".
173+
174+
Args:
175+
policy (AutoAugmentPolicy): Desired policy enum defined by
176+
:class:`torchvision.transforms.autoaugment.AutoAugmentPolicy`. Default is ``AutoAugmentPolicy.IMAGENET``.
177+
interpolation (InterpolationMode): Desired interpolation enum defined by
178+
:class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
179+
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
180+
fill (sequence or number, optional): Pixel fill value for the area outside the transformed
181+
image. If given a number, the value is used for all bands respectively.
182+
"""
165183
_v1_transform_cls = _transforms.AutoAugment
166184

167185
_AUGMENTATION_SPACE = {
@@ -318,6 +336,27 @@ def forward(self, *inputs: Any) -> Any:
318336

319337

320338
class RandAugment(_AutoAugmentBase):
339+
r"""[BETA] RandAugment data augmentation method based on
340+
`"RandAugment: Practical automated data augmentation with a reduced search space"
341+
<https://arxiv.org/abs/1909.13719>`_.
342+
343+
.. betastatus:: RandAugment transform
344+
345+
If the image is torch Tensor, it should be of type torch.uint8, and it is expected
346+
to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
347+
If img is PIL Image, it is expected to be in mode "L" or "RGB".
348+
349+
Args:
350+
num_ops (int): Number of augmentation transformations to apply sequentially.
351+
magnitude (int): Magnitude for all the transformations.
352+
num_magnitude_bins (int): The number of different magnitude values.
353+
interpolation (InterpolationMode): Desired interpolation enum defined by
354+
:class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
355+
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
356+
fill (sequence or number, optional): Pixel fill value for the area outside the transformed
357+
image. If given a number, the value is used for all bands respectively.
358+
"""
359+
321360
_v1_transform_cls = _transforms.RandAugment
322361
_AUGMENTATION_SPACE = {
323362
"Identity": (lambda num_bins, height, width: None, False),
@@ -379,6 +418,24 @@ def forward(self, *inputs: Any) -> Any:
379418

380419

381420
class TrivialAugmentWide(_AutoAugmentBase):
421+
r"""[BETA] Dataset-independent data-augmentation with TrivialAugment Wide, as described in
422+
`"TrivialAugment: Tuning-free Yet State-of-the-Art Data Augmentation" <https://arxiv.org/abs/2103.10158>`_.
423+
424+
.. betastatus:: TrivialAugmentWide transform
425+
426+
If the image is torch Tensor, it should be of type torch.uint8, and it is expected
427+
to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
428+
If img is PIL Image, it is expected to be in mode "L" or "RGB".
429+
430+
Args:
431+
num_magnitude_bins (int): The number of different magnitude values.
432+
interpolation (InterpolationMode): Desired interpolation enum defined by
433+
:class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
434+
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
435+
fill (sequence or number, optional): Pixel fill value for the area outside the transformed
436+
image. If given a number, the value is used for all bands respectively.
437+
"""
438+
382439
_v1_transform_cls = _transforms.TrivialAugmentWide
383440
_AUGMENTATION_SPACE = {
384441
"Identity": (lambda num_bins, height, width: None, False),
@@ -430,6 +487,29 @@ def forward(self, *inputs: Any) -> Any:
430487

431488

432489
class AugMix(_AutoAugmentBase):
490+
r"""[BETA] AugMix data augmentation method based on
491+
`"AugMix: A Simple Data Processing Method to Improve Robustness and Uncertainty" <https://arxiv.org/abs/1912.02781>`_.
492+
493+
.. betastatus:: AugMix transform
494+
495+
If the image is torch Tensor, it should be of type torch.uint8, and it is expected
496+
to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
497+
If img is PIL Image, it is expected to be in mode "L" or "RGB".
498+
499+
Args:
500+
severity (int): The severity of base augmentation operators. Default is ``3``.
501+
mixture_width (int): The number of augmentation chains. Default is ``3``.
502+
chain_depth (int): The depth of augmentation chains. A negative value denotes stochastic depth sampled from the interval [1, 3].
503+
Default is ``-1``.
504+
alpha (float): The hyperparameter for the probability distributions. Default is ``1.0``.
505+
all_ops (bool): Use all operations (including brightness, contrast, color and sharpness). Default is ``True``.
506+
interpolation (InterpolationMode): Desired interpolation enum defined by
507+
:class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
508+
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
509+
fill (sequence or number, optional): Pixel fill value for the area outside the transformed
510+
image. If given a number, the value is used for all bands respectively.
511+
"""
512+
433513
_v1_transform_cls = _transforms.AugMix
434514

435515
_PARTIAL_AUGMENTATION_SPACE = {

0 commit comments

Comments
 (0)