diff --git a/torchvision/io/video.py b/torchvision/io/video.py index 5337e26e396..866fe48274f 100644 --- a/torchvision/io/video.py +++ b/torchvision/io/video.py @@ -137,12 +137,14 @@ def _read_from_stream(container, start_offset, end_offset, pts_unit, stream, str pass # ensure that the results are sorted wrt the pts result = [frames[i] for i in sorted(frames) if start_offset <= frames[i].pts <= end_offset] - if start_offset > 0 and start_offset not in frames: + if len(frames) > 0 and start_offset > 0 and start_offset not in frames: # if there is no frame that exactly matches the pts of start_offset # add the last frame smaller than start_offset, to guarantee that # we will have all the necessary data. This is most useful for audio - first_frame_pts = max(i for i in frames if i < start_offset) - result.insert(0, frames[first_frame_pts]) + preceding_frames = [i for i in frames if i < start_offset] + if len(preceding_frames) > 0: + first_frame_pts = max(preceding_frames) + result.insert(0, frames[first_frame_pts]) return result