[video transforms]in ToTensorVideo, divide value by 255.0

zyan3 · zyan3 · commit cf66708079e2 · 2019-09-07T22:09:24.000-07:00
diff --git a/test/test_transforms_video.py b/test/test_transforms_video.py
@@ -19,7 +19,7 @@ def test_random_crop_video(self):
         width = random.randint(10, 32) * 2
         oheight = random.randint(5, (height - 2) / 2) * 2
         owidth = random.randint(5, (width - 2) / 2) * 2
-        clip = torch.ones((numFrames, height, width, 3), dtype=torch.uint8)
+        clip = torch.randint(0, 256, (numFrames, height, width, 3), dtype=torch.uint8)
         result = transforms.Compose([
             transforms.ToTensorVideo(),
             transforms.RandomCropVideo((oheight, owidth)),
@@ -35,7 +35,7 @@ def test_random_resized_crop_video(self):
         width = random.randint(10, 32) * 2
         oheight = random.randint(5, (height - 2) / 2) * 2
         owidth = random.randint(5, (width - 2) / 2) * 2
-        clip = torch.ones((numFrames, height, width, 3), dtype=torch.uint8)
+        clip = torch.randint(0, 256, (numFrames, height, width, 3), dtype=torch.uint8)
         result = transforms.Compose([
             transforms.ToTensorVideo(),
             transforms.RandomResizedCropVideo((oheight, owidth)),
@@ -52,7 +52,7 @@ def test_center_crop_video(self):
         oheight = random.randint(5, (height - 2) / 2) * 2
         owidth = random.randint(5, (width - 2) / 2) * 2
 
-        clip = torch.ones([numFrames, height, width, 3], dtype=torch.uint8)
+        clip = torch.ones((numFrames, height, width, 3), dtype=torch.uint8) * 255
         oh1 = (height - oheight) // 2
         ow1 = (width - owidth) // 2
         clipNarrow = clip[:, oh1:oh1 + oheight, ow1:ow1 + owidth, :]
diff --git a/torchvision/transforms/functional_video.py b/torchvision/transforms/functional_video.py
@@ -59,7 +59,8 @@ def center_crop(clip, crop_size):
 
 def to_tensor(clip):
     """
-    Convert tensor data type to be float and permute the dimenions of clip tensor
+    Convert tensor data type from uint8 to float, divide value by 255.0 and
+    permute the dimenions of clip tensor
     Args:
         clip (torch.tensor, dtype=torch.uint8): Size is (T, H, W, C)
     Return:
@@ -68,7 +69,7 @@ def to_tensor(clip):
     _is_tensor_video_clip(clip)
     if not clip.dtype == torch.uint8:
         raise TypeError("clip tensor should have data type uint8. Got %s" % str(clip.dtype))
-    return clip.float().permute(3, 0, 1, 2)
+    return clip.float().permute(3, 0, 1, 2) / 255.0
 
 
 def normalize(clip, mean, std, inplace=False):
diff --git a/torchvision/transforms/transforms_video.py b/torchvision/transforms/transforms_video.py
@@ -126,15 +126,15 @@ def __repr__(self):
 
 class ToTensorVideo(object):
     """
-    Convert tensor data type to be float and permute the dimenions of clip tensor
+    Convert tensor data type from uint8 to float, divide value by 255.0 and
+    permute the dimenions of clip tensor
     """
 
     def __init__(self):
         pass
 
     def __call__(self, clip):
         """
-        Convert tensor data type to be float and permute the dimenions of clip tensor
         Args:
             clip (torch.tensor, dtype=torch.uint8): Size is (T, H, W, C)
         Return:
@@ -157,7 +157,6 @@ def __init__(self, p=0.5):
 
     def __call__(self, clip):
         """
-        Convert tensor data type to be float and permute the dimenions of clip tensor
         Args:
             clip (torch.tensor): Size is (C, T, H, W)
         Return: