diff --git a/torchvision/prototype/features/__init__.py b/torchvision/prototype/features/__init__.py index dd9982a04db..4da2c63f123 100644 --- a/torchvision/prototype/features/__init__.py +++ b/torchvision/prototype/features/__init__.py @@ -4,3 +4,5 @@ from ._image import ColorSpace, Image from ._label import Label, OneHotLabel from ._segmentation_mask import SegmentationMask + +# We put lots of effort on Video this half. We will need to figure out video tensors as well in this prototype diff --git a/torchvision/prototype/features/_bounding_box.py b/torchvision/prototype/features/_bounding_box.py index 6c5dac72d53..5ff83122ada 100644 --- a/torchvision/prototype/features/_bounding_box.py +++ b/torchvision/prototype/features/_bounding_box.py @@ -15,7 +15,7 @@ class BoundingBoxFormat(StrEnum): class BoundingBox(Feature): - formats = BoundingBoxFormat + formats = BoundingBoxFormat # Couldn't find a use of this in code. Is there a reason why we don't just let people access the enums directly? format: BoundingBoxFormat image_size: Tuple[int, int] @@ -40,6 +40,9 @@ def __new__( def to_format(self, format: Union[str, BoundingBoxFormat]) -> "BoundingBox": # import at runtime to avoid cyclic imports from torchvision.prototype.transforms.functional import convert_bounding_box_format + # I think we can avoid this by not having a `to_format` method but instead require users to explicitly call the + # convert method. As far as I see, the specific method is used only once on the code, so it is something we + # could avoid all together. if isinstance(format, str): format = BoundingBoxFormat[format] diff --git a/torchvision/prototype/features/_encoded.py b/torchvision/prototype/features/_encoded.py index 338b2d2230d..453659cb8ce 100644 --- a/torchvision/prototype/features/_encoded.py +++ b/torchvision/prototype/features/_encoded.py @@ -41,6 +41,7 @@ def image_size(self) -> Tuple[int, int]: def decode(self) -> Image: # import at runtime to avoid cyclic imports from torchvision.prototype.transforms.functional import decode_image_with_pil + # Same commens as on the BoundingBox.to_format return Image(decode_image_with_pil(self)) diff --git a/torchvision/prototype/features/_image.py b/torchvision/prototype/features/_image.py index a07da277314..2976c414f3b 100644 --- a/torchvision/prototype/features/_image.py +++ b/torchvision/prototype/features/_image.py @@ -13,6 +13,7 @@ class ColorSpace(StrEnum): # this is just for test purposes + # How about the transparency spaces supported by ImageReadMode? _SENTINEL = -1 OTHER = 0 GRAYSCALE = 1 @@ -77,7 +78,9 @@ def guess_color_space(data: torch.Tensor) -> ColorSpace: return ColorSpace.OTHER def show(self) -> None: + # This is a nice to have, but not a necessary method, for this early in the prototype to_pil_image(make_grid(self.view(-1, *self.shape[-3:]))).show() def draw_bounding_box(self, bounding_box: BoundingBox, **kwargs: Any) -> "Image": + # Same as above and nothing that this is the only method that requires to_format(). return Image.new_like(self, draw_bounding_boxes(self, bounding_box.to_format("xyxy").view(-1, 4), **kwargs)) diff --git a/torchvision/prototype/features/_label.py b/torchvision/prototype/features/_label.py index 3ce1da647e7..ef0ddf0d6a6 100644 --- a/torchvision/prototype/features/_label.py +++ b/torchvision/prototype/features/_label.py @@ -15,7 +15,7 @@ def __new__( *, dtype: Optional[torch.dtype] = None, device: Optional[torch.device] = None, - like: Optional["Label"] = None, + like: Optional["Label"] = None, # Since are at Py3.7, perhaps we could do `from __future__ import annotations` now. categories: Optional[Sequence[str]] = None, ): label = super().__new__(cls, data, dtype=dtype, device=device) @@ -26,7 +26,7 @@ def __new__( @classmethod def from_category(cls, category: str, *, categories: Sequence[str]): - categories = list(categories) + categories = list(categories) # why shallow copy here? If this method is in a loop, we run the risk of creating many shallow-copies return cls(categories.index(category), categories=categories) def to_categories(self):