diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst index 1fe3e78f55f..6957e79bbfa 100644 --- a/docs/source/transforms.rst +++ b/docs/source/transforms.rst @@ -192,6 +192,7 @@ Miscellaneous v2.Lambda v2.SanitizeBoundingBox v2.ClampBoundingBox + v2.UniformTemporalSubsample .. _conversion_transforms: diff --git a/torchvision/transforms/v2/_temporal.py b/torchvision/transforms/v2/_temporal.py index b26d6b0450f..ad7526bc4a4 100644 --- a/torchvision/transforms/v2/_temporal.py +++ b/torchvision/transforms/v2/_temporal.py @@ -7,6 +7,19 @@ class UniformTemporalSubsample(Transform): + """[BETA] Uniformly subsample ``num_samples`` indices from the temporal dimension of the video. + + .. betastatus:: UniformTemporalSubsample transform + + Videos are expected to be of shape ``[..., T, C, H, W]`` where ``T`` denotes the temporal dimension. + + When ``num_samples`` is larger than the size of temporal dimension of the video, it + will sample frames based on nearest neighbor interpolation. + + Args: + num_samples (int): The number of equispaced samples to be selected + """ + _transformed_types = (is_simple_tensor, datapoints.Video) def __init__(self, num_samples: int):