From 800b9402ec2541aa8fa7aad187499cffe6981638 Mon Sep 17 00:00:00 2001 From: Guillem Orellana Trullols Date: Tue, 5 May 2020 20:44:01 +0200 Subject: [PATCH 1/4] Update ucf101.py Now the dataset is not working properly because of this line of code `indices = [i for i in range(len(video_list)) if video_list[i][len(self.root) + 1:] in selected_files]`. Performing the `len(self.root) + 1` only make sense if there is no training / to root ``` >>> root = 'data/ucf-101/videos' >>> video_path = 'data/ucf-101/videos/activity/video.avi' >>> video_path [len(root ):] '/activity/video.avi' >>> video_path [len(root ) + 1:] 'activity/video.avi' ``` Appending the root path also to the selected files is a simple solution and make the dataset works with and without a trailing slash. --- torchvision/datasets/ucf101.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/torchvision/datasets/ucf101.py b/torchvision/datasets/ucf101.py index 43d8124bd4b..464eb0018f2 100644 --- a/torchvision/datasets/ucf101.py +++ b/torchvision/datasets/ucf101.py @@ -88,10 +88,10 @@ def _select_fold(self, video_list, annotation_path, fold, train): with open(f, "r") as fid: data = fid.readlines() data = [x.strip().split(" ") for x in data] - data = [x[0] for x in data] + data = [os.path.join(self.root, x[0]) for x in data] selected_files.extend(data) selected_files = set(selected_files) - indices = [i for i in range(len(video_list)) if video_list[i][len(self.root) + 1:] in selected_files] + indices = [i for i in range(len(video_list)) if video_list[i] in selected_files] return indices def __len__(self): From d0a27086732cffa9d01cbac52d4f3f3da995e061 Mon Sep 17 00:00:00 2001 From: Guillem96 Date: Wed, 15 Jul 2020 16:25:13 +0200 Subject: [PATCH 2/4] Making UCF 101 dataset more loading efficient --- torchvision/datasets/ucf101.py | 51 +++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/torchvision/datasets/ucf101.py b/torchvision/datasets/ucf101.py index 464eb0018f2..bb3fd46a96d 100644 --- a/torchvision/datasets/ucf101.py +++ b/torchvision/datasets/ucf101.py @@ -1,4 +1,3 @@ -import glob import os from .utils import list_dir @@ -50,17 +49,28 @@ def __init__(self, root, annotation_path, frames_per_clip, step_between_clips=1, if not 1 <= fold <= 3: raise ValueError("fold should be between 1 and 3, got {}".format(fold)) - extensions = ('avi',) self.fold = fold self.train = train + self.transform = transform - classes = list(sorted(list_dir(root))) - class_to_idx = {classes[i]: i for i in range(len(classes))} - self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None) - self.classes = classes - video_list = [x[0] for x in self.samples] + # Create class to index mapping with sorted class names + self.classes = list(sorted(list_dir(root))) + class_to_idx = {c: i for i, c in enumerate(self.classes)} + + # Iterate through root directory to retrieve the path and the labels + # for each dataset example + self.samples = make_dataset( + self.root, class_to_idx, ('avi',), is_valid_file=None) + + # Get the video paths that belong to the selected fold and split + _video_paths_in_fold = self._fold_paths(annotation_path, fold, train) + # Filter the dataset samples so only the video paths belonging to the + # selected fold are processed + self.samples = [o for o in self.samples if o[0] in _video_paths_in_fold] + + # At this point, only the needed videos' path are selected video_clips = VideoClips( - video_list, + [x[0] for x in self.samples], frames_per_clip, step_between_clips, frame_rate, @@ -72,34 +82,29 @@ def __init__(self, root, annotation_path, frames_per_clip, step_between_clips=1, _audio_samples=_audio_samples, ) self.video_clips_metadata = video_clips.metadata - self.indices = self._select_fold(video_list, annotation_path, fold, train) - self.video_clips = video_clips.subset(self.indices) - self.transform = transform @property def metadata(self): return self.video_clips_metadata - def _select_fold(self, video_list, annotation_path, fold, train): - name = "train" if train else "test" - name = "{}list{:02d}.txt".format(name, fold) + def _fold_paths(self, annotation_path, fold, train): + split = 'train' if train else 'test' + name = f'{split}list{fold:02d}.txt' f = os.path.join(annotation_path, name) - selected_files = [] + with open(f, "r") as fid: - data = fid.readlines() - data = [x.strip().split(" ") for x in data] - data = [os.path.join(self.root, x[0]) for x in data] - selected_files.extend(data) - selected_files = set(selected_files) - indices = [i for i in range(len(video_list)) if video_list[i] in selected_files] - return indices + video_files = fid.readlines() + video_files = [o.strip().split(" ")[0] for o in video_files] + video_files = [os.path.join(self.root, o) for o in video_files] + video_files = set(video_files) + return video_files def __len__(self): return self.video_clips.num_clips() def __getitem__(self, idx): video, audio, info, video_idx = self.video_clips.get_clip(idx) - label = self.samples[self.indices[video_idx]][1] + label = self.samples[1] if self.transform is not None: video = self.transform(video) From f91ec95fb4196a698b8333bd4cd9fd864d46af2e Mon Sep 17 00:00:00 2001 From: Guillem96 Date: Wed, 15 Jul 2020 16:46:53 +0200 Subject: [PATCH 3/4] Fixing error on __getitem__ --- torchvision/datasets/ucf101.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/torchvision/datasets/ucf101.py b/torchvision/datasets/ucf101.py index bb3fd46a96d..38d76373bf6 100644 --- a/torchvision/datasets/ucf101.py +++ b/torchvision/datasets/ucf101.py @@ -69,7 +69,7 @@ def __init__(self, root, annotation_path, frames_per_clip, step_between_clips=1, self.samples = [o for o in self.samples if o[0] in _video_paths_in_fold] # At this point, only the needed videos' path are selected - video_clips = VideoClips( + self.video_clips = VideoClips( [x[0] for x in self.samples], frames_per_clip, step_between_clips, @@ -81,7 +81,7 @@ def __init__(self, root, annotation_path, frames_per_clip, step_between_clips=1, _video_min_dimension=_video_min_dimension, _audio_samples=_audio_samples, ) - self.video_clips_metadata = video_clips.metadata + self.video_clips_metadata = self.video_clips.metadata @property def metadata(self): @@ -104,7 +104,7 @@ def __len__(self): def __getitem__(self, idx): video, audio, info, video_idx = self.video_clips.get_clip(idx) - label = self.samples[1] + label = self.samples[video_idx][1] if self.transform is not None: video = self.transform(video) From 01c1f7ead363e8a290a0b8d7bd55d737f8c1a51e Mon Sep 17 00:00:00 2001 From: Guillem96 Date: Fri, 31 Jul 2020 12:55:04 +0200 Subject: [PATCH 4/4] Fix linter issues --- torchvision/datasets/ucf101.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/torchvision/datasets/ucf101.py b/torchvision/datasets/ucf101.py index 38d76373bf6..18ce07d0801 100644 --- a/torchvision/datasets/ucf101.py +++ b/torchvision/datasets/ucf101.py @@ -56,15 +56,15 @@ def __init__(self, root, annotation_path, frames_per_clip, step_between_clips=1, # Create class to index mapping with sorted class names self.classes = list(sorted(list_dir(root))) class_to_idx = {c: i for i, c in enumerate(self.classes)} - + # Iterate through root directory to retrieve the path and the labels # for each dataset example self.samples = make_dataset( self.root, class_to_idx, ('avi',), is_valid_file=None) - + # Get the video paths that belong to the selected fold and split _video_paths_in_fold = self._fold_paths(annotation_path, fold, train) - # Filter the dataset samples so only the video paths belonging to the + # Filter the dataset samples so only the video paths belonging to the # selected fold are processed self.samples = [o for o in self.samples if o[0] in _video_paths_in_fold]