diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst
index 1fe3e78f55f..6957e79bbfa 100644
--- a/docs/source/transforms.rst
+++ b/docs/source/transforms.rst
@@ -192,6 +192,7 @@ Miscellaneous
     v2.Lambda
     v2.SanitizeBoundingBox
     v2.ClampBoundingBox
+    v2.UniformTemporalSubsample
 
 .. _conversion_transforms:
 
diff --git a/torchvision/transforms/v2/_temporal.py b/torchvision/transforms/v2/_temporal.py
index b26d6b0450f..ad7526bc4a4 100644
--- a/torchvision/transforms/v2/_temporal.py
+++ b/torchvision/transforms/v2/_temporal.py
@@ -7,6 +7,19 @@
 
 
 class UniformTemporalSubsample(Transform):
+    """[BETA] Uniformly subsample ``num_samples`` indices from the temporal dimension of the video.
+
+    .. betastatus:: UniformTemporalSubsample transform
+
+    Videos are expected to be of shape ``[..., T, C, H, W]`` where ``T`` denotes the temporal dimension.
+
+    When ``num_samples`` is larger than the size of temporal dimension of the video, it
+    will sample frames based on nearest neighbor interpolation.
+
+    Args:
+        num_samples (int): The number of equispaced samples to be selected
+    """
+
     _transformed_types = (is_simple_tensor, datapoints.Video)
 
     def __init__(self, num_samples: int):