return features instead of vanilla tensors from prototype datasets (#4864)

pmeier · web-flow · commit aedd39792d07 · 2021-11-17T16:56:12.000+01:00
* return features instead of vanilla tensors from prototype datasets

* fix tests

* remove inplace

* add explanation for __init_subclass__

* fix label for test split

* relax test

* remove pixels
diff --git a/test/builtin_dataset_mocks.py b/test/builtin_dataset_mocks.py
@@ -14,6 +14,7 @@
 from torch.testing import make_tensor as _make_tensor
 from torchdata.datapipes.iter import IterDataPipe
 from torchvision.prototype import datasets
+from torchvision.prototype.datasets._api import DEFAULT_DECODER_MAP, DEFAULT_DECODER
 from torchvision.prototype.datasets._api import find
 from torchvision.prototype.utils._internal import add_suggestion
 
@@ -99,28 +100,16 @@ def _get(self, dataset, config):
         self._cache[(name, config)] = mock_resources, mock_info
         return mock_resources, mock_info
 
-    def _decoder(self, dataset_type):
-        def to_bytes(file):
-            try:
-                return file.read()
-            finally:
-                file.close()
-
-        if dataset_type == datasets.utils.DatasetType.RAW:
-            return datasets.decoder.raw
-        else:
-            return to_bytes
-
     def load(
-        self, name: str, decoder=DEFAULT_TEST_DECODER, split="train", **options: Any
+        self, name: str, decoder=DEFAULT_DECODER, split="train", **options: Any
     ) -> Tuple[IterDataPipe, Dict[str, Any]]:
         dataset = find(name)
         config = dataset.info.make_config(split=split, **options)
         resources, mock_info = self._get(dataset, config)
         datapipe = dataset._make_datapipe(
             [resource.to_datapipe() for resource in resources],
             config=config,
-            decoder=self._decoder(dataset.info.type) if decoder is DEFAULT_TEST_DECODER else decoder,
+            decoder=DEFAULT_DECODER_MAP.get(dataset.info.type) if decoder is DEFAULT_DECODER else decoder,
         )
         return datapipe, mock_info
 
diff --git a/test/test_prototype_builtin_datasets.py b/test/test_prototype_builtin_datasets.py
@@ -1,52 +1,55 @@
-import functools
 import io
 
 import builtin_dataset_mocks
 import pytest
 from torchdata.datapipes.iter import IterDataPipe
-from torchvision.prototype import datasets
+from torchvision.prototype import datasets, features
+from torchvision.prototype.datasets._api import DEFAULT_DECODER
 from torchvision.prototype.utils._internal import sequence_to_str
 
 
-_loaders = []
-_datasets = []
-
-# TODO: this can be replaced by torchvision.prototype.datasets.list() as soon as all builtin datasets are supported
-TMP = [
-    "mnist",
-    "fashionmnist",
-    "kmnist",
-    "emnist",
-    "qmnist",
-    "cifar10",
-    "cifar100",
-    "caltech256",
-    "caltech101",
-    "imagenet",
-]
-for name in TMP:
-    loader = functools.partial(builtin_dataset_mocks.load, name)
-    _loaders.append(pytest.param(loader, id=name))
-
-    info = datasets.info(name)
-    _datasets.extend(
-        [
-            pytest.param(*loader(**config), id=f"{name}-{'-'.join([str(value) for value in config.values()])}")
-            for config in info._configs
-        ]
-    )
-
-loaders = pytest.mark.parametrize("loader", _loaders)
-builtin_datasets = pytest.mark.parametrize(("dataset", "mock_info"), _datasets)
+def to_bytes(file):
+    try:
+        return file.read()
+    finally:
+        file.close()
+
+
+def dataset_parametrization(*names, decoder=to_bytes):
+    if not names:
+        # TODO: Replace this with torchvision.prototype.datasets.list() as soon as all builtin datasets are supported
+        names = (
+            "mnist",
+            "fashionmnist",
+            "kmnist",
+            "emnist",
+            "qmnist",
+            "cifar10",
+            "cifar100",
+            "caltech256",
+            "caltech101",
+            "imagenet",
+        )
+
+    params = []
+    for name in names:
+        for config in datasets.info(name)._configs:
+            if name == "imagenet" and config.split == "test":
+                print()
+            id = f"{name}-{'-'.join([str(value) for value in config.values()])}"
+            dataset, mock_info = builtin_dataset_mocks.load(name, decoder=decoder, **config)
+            params.append(pytest.param(dataset, mock_info, id=id))
+
+    return pytest.mark.parametrize(("dataset", "mock_info"), params)
 
 
 class TestCommon:
-    @builtin_datasets
+    @dataset_parametrization()
     def test_smoke(self, dataset, mock_info):
         if not isinstance(dataset, IterDataPipe):
             raise AssertionError(f"Loading the dataset should return an IterDataPipe, but got {type(dataset)} instead.")
 
-    @builtin_datasets
+    @dataset_parametrization()
     def test_sample(self, dataset, mock_info):
         try:
             sample = next(iter(dataset))
@@ -59,15 +62,15 @@ def test_sample(self, dataset, mock_info):
         if not sample:
             raise AssertionError("Sample dictionary is empty.")
 
-    @builtin_datasets
+    @dataset_parametrization()
     def test_num_samples(self, dataset, mock_info):
         num_samples = 0
         for _ in dataset:
             num_samples += 1
 
         assert num_samples == mock_info["num_samples"]
 
-    @builtin_datasets
+    @dataset_parametrization()
     def test_decoding(self, dataset, mock_info):
         undecoded_features = {key for key, value in next(iter(dataset)).items() if isinstance(value, io.IOBase)}
         if undecoded_features:
@@ -76,6 +79,12 @@ def test_decoding(self, dataset, mock_info):
                 f"{sequence_to_str(sorted(undecoded_features), separate_last='and ')} were not decoded."
             )
 
+    @dataset_parametrization(decoder=DEFAULT_DECODER)
+    def test_at_least_one_feature(self, dataset, mock_info):
+        sample = next(iter(dataset))
+        if not any(isinstance(value, features.Feature) for value in sample.values()):
+            raise AssertionError("The sample contained no feature.")
+
 
 class TestQMNIST:
     @pytest.mark.parametrize(
diff --git a/torchvision/prototype/datasets/_api.py b/torchvision/prototype/datasets/_api.py
@@ -49,9 +49,9 @@ def info(name: str) -> DatasetInfo:
     return find(name).info
 
 
-default = object()
+DEFAULT_DECODER = object()
 
-DEFAULT_DECODER: Dict[DatasetType, Callable[[io.IOBase], torch.Tensor]] = {
+DEFAULT_DECODER_MAP: Dict[DatasetType, Callable[[io.IOBase], torch.Tensor]] = {
     DatasetType.RAW: raw,
     DatasetType.IMAGE: pil,
 }
@@ -60,15 +60,15 @@ def info(name: str) -> DatasetInfo:
 def load(
     name: str,
     *,
-    decoder: Optional[Callable[[io.IOBase], torch.Tensor]] = default,  # type: ignore[assignment]
+    decoder: Optional[Callable[[io.IOBase], torch.Tensor]] = DEFAULT_DECODER,  # type: ignore[assignment]
     split: str = "train",
     **options: Any,
 ) -> IterDataPipe[Dict[str, Any]]:
     name = name.lower()
     dataset = find(name)
 
-    if decoder is default:
-        decoder = DEFAULT_DECODER.get(dataset.info.type)
+    if decoder is DEFAULT_DECODER:
+        decoder = DEFAULT_DECODER_MAP.get(dataset.info.type)
 
     config = dataset.info.make_config(split=split, **options)
     root = os.path.join(home(), name)
diff --git a/torchvision/prototype/datasets/_builtin/caltech.py b/torchvision/prototype/datasets/_builtin/caltech.py
@@ -22,6 +22,7 @@
     DatasetType,
 )
 from torchvision.prototype.datasets.utils._internal import INFINITE_BUFFER_SIZE, read_mat
+from torchvision.prototype.features import Label, BoundingBox
 
 
 class Caltech101(Dataset):
@@ -95,8 +96,8 @@ def _collate_and_decode_sample(
         image = decoder(image_buffer) if decoder else image_buffer
 
         ann = read_mat(ann_buffer)
-        bbox = torch.as_tensor(ann["box_coord"].astype(np.int64))
-        contour = torch.as_tensor(ann["obj_contour"])
+        bbox = BoundingBox(ann["box_coord"].astype(np.int64).squeeze()[[2, 0, 3, 1]], format="xyxy")
+        contour = torch.tensor(ann["obj_contour"].T)
 
         return dict(
             category=category,
@@ -171,9 +172,9 @@ def _collate_and_decode_sample(
 
         dir_name = pathlib.Path(path).parent.name
         label_str, category = dir_name.split(".")
-        label = torch.tensor(int(label_str))
+        label = Label(int(label_str), category=category)
 
-        return dict(label=label, category=category, image=decoder(buffer) if decoder else buffer)
+        return dict(label=label, image=decoder(buffer) if decoder else buffer)
 
     def _make_datapipe(
         self,
diff --git a/torchvision/prototype/datasets/_builtin/cifar.py b/torchvision/prototype/datasets/_builtin/cifar.py
@@ -28,6 +28,7 @@
     image_buffer_from_array,
     path_comparator,
 )
+from torchvision.prototype.features import Label, Image
 
 __all__ = ["Cifar10", "Cifar100"]
 
@@ -65,17 +66,16 @@ def _collate_and_decode(
     ) -> Dict[str, Any]:
         image_array, category_idx = data
 
-        category = self.categories[category_idx]
-        label = torch.tensor(category_idx)
-
-        image: Union[torch.Tensor, io.BytesIO]
+        image: Union[Image, io.BytesIO]
         if decoder is raw:
-            image = torch.from_numpy(image_array)
+            image = Image(image_array)
         else:
             image_buffer = image_buffer_from_array(image_array.transpose((1, 2, 0)))
-            image = decoder(image_buffer) if decoder else image_buffer
+            image = decoder(image_buffer) if decoder else image_buffer  # type: ignore[assignment]
+
+        label = Label(category_idx, category=self.categories[category_idx])
 
-        return dict(label=label, category=category, image=image)
+        return dict(image=image, label=label)
 
     def _make_datapipe(
         self,
diff --git a/torchvision/prototype/datasets/_builtin/imagenet.py b/torchvision/prototype/datasets/_builtin/imagenet.py
@@ -21,9 +21,22 @@
     getitem,
     read_mat,
 )
+from torchvision.prototype.features import Label, DEFAULT
 from torchvision.prototype.utils._internal import FrozenMapping
 
 
+class ImageNetLabel(Label):
+    wnid: Optional[str]
+
+    @classmethod
+    def _parse_meta_data(
+        cls,
+        category: Optional[str] = DEFAULT,  # type: ignore[assignment]
+        wnid: Optional[str] = DEFAULT,  # type: ignore[assignment]
+    ) -> Dict[str, Tuple[Any, Any]]:
+        return dict(category=(category, None), wnid=(wnid, None))
+
+
 class ImageNet(Dataset):
     def _make_info(self) -> DatasetInfo:
         name = "imagenet"
@@ -78,12 +91,12 @@ def resources(self, config: DatasetConfig) -> List[OnlineResource]:
 
     _TRAIN_IMAGE_NAME_PATTERN = re.compile(r"(?P<wnid>n\d{8})_\d+[.]JPEG")
 
-    def _collate_train_data(self, data: Tuple[str, io.IOBase]) -> Tuple[Tuple[int, str, str], Tuple[str, io.IOBase]]:
+    def _collate_train_data(self, data: Tuple[str, io.IOBase]) -> Tuple[ImageNetLabel, Tuple[str, io.IOBase]]:
         path = pathlib.Path(data[0])
         wnid = self._TRAIN_IMAGE_NAME_PATTERN.match(path.name).group("wnid")  # type: ignore[union-attr]
         category = self.wnid_to_category[wnid]
-        label = self.categories.index(category)
-        return (label, category, wnid), data
+        label = ImageNetLabel(self.categories.index(category), category=category, wnid=wnid)
+        return label, data
 
     _VAL_TEST_IMAGE_NAME_PATTERN = re.compile(r"ILSVRC2012_(val|test)_(?P<id>\d{8})[.]JPEG")
 
@@ -93,31 +106,27 @@ def _val_test_image_key(self, data: Tuple[str, Any]) -> int:
 
     def _collate_val_data(
         self, data: Tuple[Tuple[int, int], Tuple[str, io.IOBase]]
-    ) -> Tuple[Tuple[int, str, str], Tuple[str, io.IOBase]]:
+    ) -> Tuple[ImageNetLabel, Tuple[str, io.IOBase]]:
         label_data, image_data = data
         _, label = label_data
         category = self.categories[label]
         wnid = self.category_to_wnid[category]
-        return (label, category, wnid), image_data
+        return ImageNetLabel(label, category=category, wnid=wnid), image_data
 
-    def _collate_test_data(self, data: Tuple[str, io.IOBase]) -> Tuple[Tuple[None, None, None], Tuple[str, io.IOBase]]:
-        return (None, None, None), data
+    def _collate_test_data(self, data: Tuple[str, io.IOBase]) -> Tuple[None, Tuple[str, io.IOBase]]:
+        return None, data
 
     def _collate_and_decode_sample(
         self,
-        data: Tuple[Tuple[Optional[int], Optional[str], Optional[str]], Tuple[str, io.IOBase]],
+        data: Tuple[Optional[ImageNetLabel], Tuple[str, io.IOBase]],
         *,
         decoder: Optional[Callable[[io.IOBase], torch.Tensor]],
     ) -> Dict[str, Any]:
-        ann_data, image_data = data
-        label, category, wnid = ann_data
-        path, buffer = image_data
+        label, (path, buffer) = data
         return dict(
             path=path,
             image=decoder(buffer) if decoder else buffer,
             label=label,
-            category=category,
-            wnid=wnid,
         )
 
     def _make_datapipe(
diff --git a/torchvision/prototype/datasets/decoder.py b/torchvision/prototype/datasets/decoder.py
@@ -13,4 +13,7 @@ def raw(buffer: io.IOBase) -> torch.Tensor:
 
 
 def pil(buffer: io.IOBase) -> features.Image:
-    return features.Image(pil_to_tensor(PIL.Image.open(buffer)))
+    try:
+        return features.Image(pil_to_tensor(PIL.Image.open(buffer)))
+    finally:
+        buffer.close()
diff --git a/torchvision/prototype/features/__init__.py b/torchvision/prototype/features/__init__.py
@@ -1,4 +1,4 @@
 from ._bounding_box import BoundingBoxFormat, BoundingBox
-from ._feature import Feature
+from ._feature import Feature, DEFAULT
 from ._image import Image, ColorSpace
 from ._label import Label
diff --git a/torchvision/prototype/features/_bounding_box.py b/torchvision/prototype/features/_bounding_box.py
@@ -115,7 +115,10 @@ def guess_image_size(cls, data: torch.Tensor, *, format: BoundingBoxFormat) -> T
                 data = cls._TO_XYXY_MAP[format](data)
             data = cls._FROM_XYXY_MAP[BoundingBoxFormat.XYWH](data)
         *_, w, h = to_parts(data)
-        return int(h.ceil()), int(w.ceil())
+        if data.dtype.is_floating_point:
+            w = w.ceil()
+            h = h.ceil()
+        return int(h), int(w)
 
     @classmethod
     def from_parts(
diff --git a/torchvision/prototype/features/_feature.py b/torchvision/prototype/features/_feature.py
diff --git a/torchvision/prototype/utils/_internal.py b/torchvision/prototype/utils/_internal.py