diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst index 1dec6bedf15..8e3c60085de 100644 --- a/docs/source/transforms.rst +++ b/docs/source/transforms.rst @@ -191,6 +191,7 @@ Miscellaneous Lambda v2.Lambda v2.SanitizeBoundingBox + v2.ClampBoundingBox .. _conversion_transforms: @@ -212,6 +213,7 @@ Conversion v2.ConvertImageDtype v2.ConvertDtype v2.ToDtype + v2.ConvertBoundingBoxFormat Auto-Augmentation ----------------- diff --git a/torchvision/transforms/v2/_meta.py b/torchvision/transforms/v2/_meta.py index 7d0f0ec39f9..94ec851d045 100644 --- a/torchvision/transforms/v2/_meta.py +++ b/torchvision/transforms/v2/_meta.py @@ -9,6 +9,15 @@ class ConvertBoundingBoxFormat(Transform): + """[BETA] Convert bounding box coordinates to the given ``format``, e.g. from "CXCYWH" to "XYXY". + + .. betastatus:: ConvertBoundingBoxFormat transform + + Args: + format (str or datapoints.BoundingBoxFormat): output bounding box format. + Possible values are defined by :class:`~torchvision.datapoints.BoundingBoxFormat` and + string values match the enums, e.g. "XYXY" or "XYWH" etc. + """ _transformed_types = (datapoints.BoundingBox,) def __init__(self, format: Union[str, datapoints.BoundingBoxFormat]) -> None: @@ -22,7 +31,7 @@ def _transform(self, inpt: datapoints.BoundingBox, params: Dict[str, Any]) -> da class ConvertDtype(Transform): - """[BETA] Convert a tensor image/box/mask to the given ``dtype`` and scale the values accordingly + """[BETA] Convert input image or video to the given ``dtype`` and scale the values accordingly. .. betastatus:: ConvertDtype transform @@ -63,6 +72,13 @@ def _transform( class ClampBoundingBox(Transform): + """[BETA] Clamp bounding boxes to their corresponding image dimensions. + + The clamping is done according to the bounding boxes' ``spatial_size`` meta-data. + + .. betastatus:: ClampBoundingBox transform + + """ _transformed_types = (datapoints.BoundingBox,) def _transform(self, inpt: datapoints.BoundingBox, params: Dict[str, Any]) -> datapoints.BoundingBox: