Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 20 additions & 10 deletions torchvision/prototype/datasets/utils/_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,20 +88,30 @@ def load(
root = pathlib.Path(root)
path = root / self.file_name
# Instead of the raw file, there might also be files with fewer suffixes after decompression or directories
# with no suffixes at all. Thus, we look for all paths that share the same name without suffixes as the raw
# file.
path_candidates = {file for file in path.parent.glob(path.name.replace("".join(path.suffixes), "") + "*")}
# If we don't find anything, we try to download the raw file.
if not path_candidates:
path_candidates = {self.download(root, skip_integrity_check=skip_integrity_check)}
# with no suffixes at all.
stem = path.name.replace("".join(path.suffixes), "")

# In a first step, we check for a folder with the same stem as the raw file. If it exists, we use it since
# extracted files give the best I/O performance. Note that OnlineResource._extract() makes sure that an archive
# is always extracted in a folder with the corresponding file name.
folder_candidate = path.parent / stem
if folder_candidate.exists() and folder_candidate.is_dir():
return self._loader(folder_candidate)

# If there is no folder, we look for all files that share the same stem as the raw file, but might have a
# different suffix.
file_candidates = {file for file in path.parent.glob(stem + ".*")}
# If we don't find anything, we download the raw file.
if not file_candidates:
file_candidates = {self.download(root, skip_integrity_check=skip_integrity_check)}
# If the only thing we find is the raw file, we use it and optionally perform some preprocessing steps.
if path_candidates == {path}:
if file_candidates == {path}:
if self._preprocess is not None:
path = self._preprocess(path)
# Otherwise we use the path with the fewest suffixes. This gives us the extracted > decompressed > raw priority
# that we want.
# Otherwise, we use the path with the fewest suffixes. This gives us the decompressed > raw priority that we
# want for the best I/O performance.
else:
path = min(path_candidates, key=lambda path: len(path.suffixes))
path = min(file_candidates, key=lambda path: len(path.suffixes))
return self._loader(path)

@abc.abstractmethod
Expand Down