Skip to content

Revamp prototype features and transforms #5407

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Feb 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
bfc8510
revamp prototype features (#5283)
pmeier Jan 26, 2022
be67431
remove decoding from prototype datasets (#5287)
pmeier Jan 27, 2022
0e7cc3a
Merge branch 'main' into revamp-prototype-features-transforms
pmeier Jan 27, 2022
3e23333
Merge branch 'main' into revamp-prototype-features-transforms
pmeier Jan 27, 2022
8079e44
fix pcam
pmeier Jan 27, 2022
466b845
readd functional transforms API to prototype (#5295)
pmeier Jan 31, 2022
fad04f4
Merge branch 'main' into revamp-prototype-features-transforms
pmeier Jan 31, 2022
7cffef6
Merge branch 'revamp-prototype-features-transforms' of https://github…
pmeier Jan 31, 2022
1d748ae
Merge branch 'main' into revamp-prototype-features-transforms
pmeier Feb 7, 2022
adad1df
fix prototype features and functional transforms after review (#5377)
pmeier Feb 7, 2022
e81f254
make mypy more strict on prototype features
pmeier Feb 9, 2022
394fe6e
Merge branch 'main' into revamp-prototype-features-transforms
pmeier Feb 9, 2022
ab11f61
make mypy more strict for prototype transforms
pmeier Feb 9, 2022
6e2d30b
fix annotation
pmeier Feb 10, 2022
c923d77
fix kernel tests
pmeier Feb 10, 2022
ce289d9
add automatic feature type dispatch to functional transforms (#5323)
pmeier Feb 10, 2022
f3b80ef
Merge branch 'main' into revamp-prototype-features-transforms
pmeier Feb 11, 2022
f795349
cleanup features / transforms feature branch (#5406)
pmeier Feb 11, 2022
cc16b56
remove or privatize functionality in features / datasets / transforms
pmeier Feb 11, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,36 @@ pretty = True
allow_redefinition = True
warn_redundant_casts = True

[mypy-torchvision.prototype.features.*]

; untyped definitions and calls
disallow_untyped_defs = True

; None and Optional handling
no_implicit_optional = True

; warnings
warn_unused_ignores = True
warn_return_any = True

; miscellaneous strictness flags
allow_redefinition = True

[mypy-torchvision.prototype.transforms.*]

; untyped definitions and calls
disallow_untyped_defs = True

; None and Optional handling
no_implicit_optional = True

; warnings
warn_unused_ignores = True
warn_return_any = True

; miscellaneous strictness flags
allow_redefinition = True

[mypy-torchvision.prototype.datasets.*]

; untyped definitions and calls
Expand Down
90 changes: 47 additions & 43 deletions test/builtin_dataset_mocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,50 +432,52 @@ def caltech256(info, root, config):

@register_mock
def imagenet(info, root, config):
wnids = tuple(info.extra.wnid_to_category.keys())
if config.split == "train":
images_root = root / "ILSVRC2012_img_train"
from scipy.io import savemat

categories = info.categories
wnids = [info.extra.category_to_wnid[category] for category in categories]
if config.split == "train":
num_samples = len(wnids)
archive_name = "ILSVRC2012_img_train.tar"

files = []
for wnid in wnids:
files = create_image_folder(
root=images_root,
create_image_folder(
root=root,
name=wnid,
file_name_fn=lambda image_idx: f"{wnid}_{image_idx:04d}.JPEG",
num_examples=1,
)
make_tar(images_root, f"{wnid}.tar", files[0].parent)
files.append(make_tar(root, f"{wnid}.tar"))
elif config.split == "val":
num_samples = 3
files = create_image_folder(
root=root,
name="ILSVRC2012_img_val",
file_name_fn=lambda image_idx: f"ILSVRC2012_val_{image_idx + 1:08d}.JPEG",
num_examples=num_samples,
)
images_root = files[0].parent
else: # config.split == "test"
images_root = root / "ILSVRC2012_img_test_v10102019"
archive_name = "ILSVRC2012_img_val.tar"
files = [create_image_file(root, f"ILSVRC2012_val_{idx + 1:08d}.JPEG") for idx in range(num_samples)]

num_samples = 3
devkit_root = root / "ILSVRC2012_devkit_t12"
data_root = devkit_root / "data"
data_root.mkdir(parents=True)

create_image_folder(
root=images_root,
name="test",
file_name_fn=lambda image_idx: f"ILSVRC2012_test_{image_idx + 1:08d}.JPEG",
num_examples=num_samples,
)
make_tar(root, f"{images_root.name}.tar", images_root)
with open(data_root / "ILSVRC2012_validation_ground_truth.txt", "w") as file:
for label in torch.randint(0, len(wnids), (num_samples,)).tolist():
file.write(f"{label}\n")

num_children = 0
synsets = [
(idx, wnid, category, "", num_children, [], 0, 0)
for idx, (category, wnid) in enumerate(zip(categories, wnids), 1)
]
num_children = 1
synsets.extend((0, "", "", "", num_children, [], 0, 0) for _ in range(5))
savemat(data_root / "meta.mat", dict(synsets=synsets))

make_tar(root, devkit_root.with_suffix(".tar.gz").name, compression="gz")
else: # config.split == "test"
num_samples = 5
archive_name = "ILSVRC2012_img_test_v10102019.tar"
files = [create_image_file(root, f"ILSVRC2012_test_{idx + 1:08d}.JPEG") for idx in range(num_samples)]

devkit_root = root / "ILSVRC2012_devkit_t12"
devkit_root.mkdir()
data_root = devkit_root / "data"
data_root.mkdir()
with open(data_root / "ILSVRC2012_validation_ground_truth.txt", "w") as file:
for label in torch.randint(0, len(wnids), (num_samples,)).tolist():
file.write(f"{label}\n")
make_tar(root, f"{devkit_root}.tar.gz", devkit_root, compression="gz")
make_tar(root, archive_name, *files)

return num_samples

Expand Down Expand Up @@ -667,14 +669,15 @@ def sbd(info, root, config):
@register_mock
def semeion(info, root, config):
num_samples = 3
num_categories = len(info.categories)

images = torch.rand(num_samples, 256)
labels = one_hot(torch.randint(len(info.categories), size=(num_samples,)))
labels = one_hot(torch.randint(num_categories, size=(num_samples,)), num_classes=num_categories)
with open(root / "semeion.data", "w") as fh:
for image, one_hot_label in zip(images, labels):
image_columns = " ".join([f"{pixel.item():.4f}" for pixel in image])
labels_columns = " ".join([str(label.item()) for label in one_hot_label])
fh.write(f"{image_columns} {labels_columns}\n")
fh.write(f"{image_columns} {labels_columns} \n")

return num_samples

Expand Down Expand Up @@ -729,32 +732,33 @@ def _make_detection_anns_folder(cls, root, name, *, file_name_fn, num_examples):
def _make_detection_ann_file(cls, root, name):
def add_child(parent, name, text=None):
child = ET.SubElement(parent, name)
child.text = text
child.text = str(text)
return child

def add_name(obj, name="dog"):
add_child(obj, "name", name)
return name

def add_bndbox(obj, bndbox=None):
if bndbox is None:
bndbox = {"xmin": "1", "xmax": "2", "ymin": "3", "ymax": "4"}
def add_size(obj):
obj = add_child(obj, "size")
size = {"width": 0, "height": 0, "depth": 3}
for name, text in size.items():
add_child(obj, name, text)

def add_bndbox(obj):
obj = add_child(obj, "bndbox")
bndbox = {"xmin": 1, "xmax": 2, "ymin": 3, "ymax": 4}
for name, text in bndbox.items():
add_child(obj, name, text)

return bndbox

annotation = ET.Element("annotation")
add_size(annotation)
obj = add_child(annotation, "object")
data = dict(name=add_name(obj), bndbox=add_bndbox(obj))
add_name(obj)
add_bndbox(obj)

with open(root / name, "wb") as fh:
fh.write(ET.tostring(annotation))

return data

@classmethod
def generate(cls, root, *, year, trainval):
archive_folder = root
Expand Down
21 changes: 20 additions & 1 deletion test/test_prototype_builtin_datasets.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
import functools
import io
from pathlib import Path

import pytest
import torch
from builtin_dataset_mocks import parametrize_dataset_mocks, DATASET_MOCKS
from torch.testing._comparison import assert_equal, TensorLikePair, ObjectPair
from torch.utils.data.datapipes.iter.grouping import ShardingFilterIterDataPipe as ShardingFilter
from torch.utils.data.graph import traverse
from torchdata.datapipes.iter import IterDataPipe, Shuffler
from torchvision.prototype import transforms, datasets
from torchvision.prototype.utils._internal import sequence_to_str


assert_samples_equal = functools.partial(
assert_equal, pair_types=(TensorLikePair, ObjectPair), rtol=0, atol=0, equal_nan=True
)


@pytest.fixture
def test_home(mocker, tmp_path):
mocker.patch("torchvision.prototype.datasets._api.home", return_value=str(tmp_path))
Expand Down Expand Up @@ -92,6 +99,7 @@ def test_no_vanilla_tensors(self, test_home, dataset_mock, config):
f"{sequence_to_str(sorted(vanilla_tensors), separate_last='and ')} contained vanilla tensors."
)

@pytest.mark.xfail
@parametrize_dataset_mocks(DATASET_MOCKS)
def test_transformable(self, test_home, dataset_mock, config):
dataset_mock.prepare(test_home, config)
Expand Down Expand Up @@ -137,6 +145,17 @@ def scan(graph):
if not any(type(dp) is annotation_dp_type for dp in scan(traverse(dataset))):
raise AssertionError(f"The dataset doesn't contain a {annotation_dp_type.__name__}() datapipe.")

@parametrize_dataset_mocks(DATASET_MOCKS)
def test_save_load(self, test_home, dataset_mock, config):
dataset_mock.prepare(test_home, config)
dataset = datasets.load(dataset_mock.name, **config)
sample = next(iter(dataset))

with io.BytesIO() as buffer:
torch.save(sample, buffer)
buffer.seek(0)
assert_samples_equal(torch.load(buffer), sample)


@parametrize_dataset_mocks(DATASET_MOCKS["qmnist"])
class TestQMNIST:
Expand Down Expand Up @@ -171,5 +190,5 @@ def test_label_matches_path(self, test_home, dataset_mock, config):
dataset = datasets.load(dataset_mock.name, **config)

for sample in dataset:
label_from_path = int(Path(sample["image_path"]).parent.name)
label_from_path = int(Path(sample["path"]).parent.name)
assert sample["label"] == label_from_path
15 changes: 3 additions & 12 deletions test/test_prototype_datasets_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from torchvision.prototype.utils._internal import FrozenMapping, FrozenBunch


def make_minimal_dataset_info(name="name", type=datasets.utils.DatasetType.RAW, categories=None, **kwargs):
return datasets.utils.DatasetInfo(name, type=type, categories=categories or [], **kwargs)
def make_minimal_dataset_info(name="name", categories=None, **kwargs):
return datasets.utils.DatasetInfo(name, categories=categories or [], **kwargs)


class TestFrozenMapping:
Expand Down Expand Up @@ -176,7 +176,7 @@ def resources(self, config):
# This method is just defined to appease the ABC, but will be overwritten at instantiation
pass

def _make_datapipe(self, resource_dps, *, config, decoder):
def _make_datapipe(self, resource_dps, *, config):
# This method is just defined to appease the ABC, but will be overwritten at instantiation
pass

Expand Down Expand Up @@ -229,12 +229,3 @@ def test_resources(self, mocker):

(call_args, _) = dataset._make_datapipe.call_args
assert call_args[0][0] is sentinel

def test_decoder(self):
dataset = self.DatasetMock()

sentinel = object()
dataset.load("", decoder=sentinel)

(_, call_kwargs) = dataset._make_datapipe.call_args
assert call_kwargs["decoder"] is sentinel
Loading