From 1106c57536796c8bf6ca0a25b0696469d231f395 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 31 Mar 2022 08:58:26 +0200 Subject: [PATCH 01/12] reenable serialization test --- .circleci/config.yml | 7 +++---- .circleci/config.yml.in | 7 +++---- test/test_prototype_builtin_datasets.py | 18 +++++++++++++----- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index e50e3328848..db412ff7d62 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -152,11 +152,10 @@ commands: args: --no-build-isolation <<# parameters.editable >> --editable <> . descr: Install torchvision <<# parameters.editable >> in editable mode <> + # Installs all extra dependencies that are needed in the torchvision.prototype namespace, but are not tracked in the + # project requirements. install_prototype_dependencies: steps: - - pip_install: - args: iopath - descr: Install third-party dependencies - pip_install: args: --pre torchdata --extra-index-url https://download.pytorch.org/whl/nightly/cpu descr: Install torchdata from nightly releases @@ -366,7 +365,7 @@ jobs: - install_torchvision - install_prototype_dependencies - pip_install: - args: scipy pycocotools h5py + args: scipy pycocotools h5py dill descr: Install optional dependencies - run_tests_selective: file_or_dir: test/test_prototype_*.py diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in index 5605f953fef..a0a3d2f9849 100644 --- a/.circleci/config.yml.in +++ b/.circleci/config.yml.in @@ -152,11 +152,10 @@ commands: args: --no-build-isolation <<# parameters.editable >> --editable <> . descr: Install torchvision <<# parameters.editable >> in editable mode <> + # Installs all extra dependencies that are needed in the torchvision.prototype namespace, but are not tracked in the + # project requirements. install_prototype_dependencies: steps: - - pip_install: - args: iopath - descr: Install third-party dependencies - pip_install: args: --pre torchdata --extra-index-url https://download.pytorch.org/whl/nightly/cpu descr: Install torchdata from nightly releases @@ -366,7 +365,7 @@ jobs: - install_torchvision - install_prototype_dependencies - pip_install: - args: scipy pycocotools h5py + args: scipy pycocotools h5py dill descr: Install optional dependencies - run_tests_selective: file_or_dir: test/test_prototype_*.py diff --git a/test/test_prototype_builtin_datasets.py b/test/test_prototype_builtin_datasets.py index f8dc3a0542b..8764c5e5d77 100644 --- a/test/test_prototype_builtin_datasets.py +++ b/test/test_prototype_builtin_datasets.py @@ -7,6 +7,7 @@ import torch from builtin_dataset_mocks import parametrize_dataset_mocks, DATASET_MOCKS from torch.testing._comparison import assert_equal, TensorLikePair, ObjectPair +from torch.utils.data._utils.serialization import DILL_AVAILABLE from torch.utils.data.graph import traverse from torch.utils.data.graph_settings import get_all_graph_pipes from torchdata.datapipes.iter import Shuffler, ShardingFilter @@ -109,23 +110,30 @@ def test_transformable(self, test_home, dataset_mock, config): next(iter(dataset.map(transforms.Identity()))) - @pytest.mark.xfail(reason="See https://github.com/pytorch/data/issues/237") @parametrize_dataset_mocks(DATASET_MOCKS) - def test_serializable(self, test_home, dataset_mock, config): + def test_serializable_pickle(self, test_home, dataset_mock, config): dataset_mock.prepare(test_home, config) - dataset = datasets.load(dataset_mock.name, **config) pickle.dumps(dataset) + @pytest.mark.skipif(not DILL_AVAILABLE, reason="Package `dill` is not available.") + # TODO: remove this as soon as dill is fully supported + @pytest.mark.xfail(reason="See https://github.com/pytorch/data/issues/237") + def test_serializable_dill(self, test_home, dataset_mock, config): + import dill + + dataset_mock.prepare(test_home, config) + dataset = datasets.load(dataset_mock.name, **config) + + dill.dumps(dataset) + # TODO: we need to enforce not only that both a Shuffler and a ShardingFilter are part of the datapipe, but also # that the Shuffler comes before the ShardingFilter. Early commits in https://github.com/pytorch/vision/pull/5680 # contain a custom test for that, but we opted to wait for a potential solution / test from torchdata for now. - @pytest.mark.xfail(reason="See https://github.com/pytorch/data/issues/237") @parametrize_dataset_mocks(DATASET_MOCKS) @pytest.mark.parametrize("annotation_dp_type", (Shuffler, ShardingFilter)) def test_has_annotations(self, test_home, dataset_mock, config, annotation_dp_type): - dataset_mock.prepare(test_home, config) dataset = datasets.load(dataset_mock.name, **config) From 84cde3a93c7edc6ae69751bd11e33aad7e92983e Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 31 Mar 2022 09:07:13 +0200 Subject: [PATCH 02/12] cleanup --- test/test_prototype_builtin_datasets.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_prototype_builtin_datasets.py b/test/test_prototype_builtin_datasets.py index 8764c5e5d77..0d08dbeba24 100644 --- a/test/test_prototype_builtin_datasets.py +++ b/test/test_prototype_builtin_datasets.py @@ -134,6 +134,7 @@ def test_serializable_dill(self, test_home, dataset_mock, config): @parametrize_dataset_mocks(DATASET_MOCKS) @pytest.mark.parametrize("annotation_dp_type", (Shuffler, ShardingFilter)) def test_has_annotations(self, test_home, dataset_mock, config, annotation_dp_type): + dataset_mock.prepare(test_home, config) dataset = datasets.load(dataset_mock.name, **config) From 0b77ea82233bc0299b82eee0dfee1bae51abd118 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 31 Mar 2022 09:14:44 +0200 Subject: [PATCH 03/12] fix dill test --- test/test_prototype_builtin_datasets.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_prototype_builtin_datasets.py b/test/test_prototype_builtin_datasets.py index 0d08dbeba24..bed1875cf6a 100644 --- a/test/test_prototype_builtin_datasets.py +++ b/test/test_prototype_builtin_datasets.py @@ -119,7 +119,8 @@ def test_serializable_pickle(self, test_home, dataset_mock, config): @pytest.mark.skipif(not DILL_AVAILABLE, reason="Package `dill` is not available.") # TODO: remove this as soon as dill is fully supported - @pytest.mark.xfail(reason="See https://github.com/pytorch/data/issues/237") + @pytest.mark.xfail(reason="See https://github.com/pytorch/data/issues/237", raises=RecursionError) + @parametrize_dataset_mocks(DATASET_MOCKS) def test_serializable_dill(self, test_home, dataset_mock, config): import dill From f382ba87dcce2562d007723d31798fc7648624fa Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 31 Mar 2022 13:48:58 +0200 Subject: [PATCH 04/12] trigger CI From b3a630fbb11ba71956b6e94b9de8c222b7bd21a2 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 31 Mar 2022 16:26:59 +0200 Subject: [PATCH 05/12] patch DILL_AVAILABLE for pickle serialization --- test/test_prototype_builtin_datasets.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/test_prototype_builtin_datasets.py b/test/test_prototype_builtin_datasets.py index bed1875cf6a..7d5b92f301f 100644 --- a/test/test_prototype_builtin_datasets.py +++ b/test/test_prototype_builtin_datasets.py @@ -111,7 +111,10 @@ def test_transformable(self, test_home, dataset_mock, config): next(iter(dataset.map(transforms.Identity()))) @parametrize_dataset_mocks(DATASET_MOCKS) - def test_serializable_pickle(self, test_home, dataset_mock, config): + def test_serializable_pickle(self, mocker, test_home, dataset_mock, config): + if DILL_AVAILABLE: + mocker.patch("torch.utils.data.datapipes.datapipe.DILL_AVAILABLE", new=False) + dataset_mock.prepare(test_home, config) dataset = datasets.load(dataset_mock.name, **config) From e1aaf7afe1a5a3bb48ed5d4d41ac1acaf5927968 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Fri, 1 Apr 2022 19:43:57 +0200 Subject: [PATCH 06/12] revert CI changes --- .circleci/config.yml | 7 ++++--- .circleci/config.yml.in | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index db412ff7d62..e50e3328848 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -152,10 +152,11 @@ commands: args: --no-build-isolation <<# parameters.editable >> --editable <> . descr: Install torchvision <<# parameters.editable >> in editable mode <> - # Installs all extra dependencies that are needed in the torchvision.prototype namespace, but are not tracked in the - # project requirements. install_prototype_dependencies: steps: + - pip_install: + args: iopath + descr: Install third-party dependencies - pip_install: args: --pre torchdata --extra-index-url https://download.pytorch.org/whl/nightly/cpu descr: Install torchdata from nightly releases @@ -365,7 +366,7 @@ jobs: - install_torchvision - install_prototype_dependencies - pip_install: - args: scipy pycocotools h5py dill + args: scipy pycocotools h5py descr: Install optional dependencies - run_tests_selective: file_or_dir: test/test_prototype_*.py diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in index a0a3d2f9849..5605f953fef 100644 --- a/.circleci/config.yml.in +++ b/.circleci/config.yml.in @@ -152,10 +152,11 @@ commands: args: --no-build-isolation <<# parameters.editable >> --editable <> . descr: Install torchvision <<# parameters.editable >> in editable mode <> - # Installs all extra dependencies that are needed in the torchvision.prototype namespace, but are not tracked in the - # project requirements. install_prototype_dependencies: steps: + - pip_install: + args: iopath + descr: Install third-party dependencies - pip_install: args: --pre torchdata --extra-index-url https://download.pytorch.org/whl/nightly/cpu descr: Install torchdata from nightly releases @@ -365,7 +366,7 @@ jobs: - install_torchvision - install_prototype_dependencies - pip_install: - args: scipy pycocotools h5py dill + args: scipy pycocotools h5py descr: Install optional dependencies - run_tests_selective: file_or_dir: test/test_prototype_*.py From d8aeb6d725c4cd4427287f93d7977176e58cfa4b Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Fri, 1 Apr 2022 19:50:11 +0200 Subject: [PATCH 07/12] remove dill test and traversable test --- test/test_prototype_builtin_datasets.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/test/test_prototype_builtin_datasets.py b/test/test_prototype_builtin_datasets.py index 7d5b92f301f..c1d053a311c 100644 --- a/test/test_prototype_builtin_datasets.py +++ b/test/test_prototype_builtin_datasets.py @@ -7,7 +7,6 @@ import torch from builtin_dataset_mocks import parametrize_dataset_mocks, DATASET_MOCKS from torch.testing._comparison import assert_equal, TensorLikePair, ObjectPair -from torch.utils.data._utils.serialization import DILL_AVAILABLE from torch.utils.data.graph import traverse from torch.utils.data.graph_settings import get_all_graph_pipes from torchdata.datapipes.iter import Shuffler, ShardingFilter @@ -111,26 +110,18 @@ def test_transformable(self, test_home, dataset_mock, config): next(iter(dataset.map(transforms.Identity()))) @parametrize_dataset_mocks(DATASET_MOCKS) - def test_serializable_pickle(self, mocker, test_home, dataset_mock, config): - if DILL_AVAILABLE: - mocker.patch("torch.utils.data.datapipes.datapipe.DILL_AVAILABLE", new=False) - + def test_traversable(self, test_home, dataset_mock, config): dataset_mock.prepare(test_home, config) dataset = datasets.load(dataset_mock.name, **config) - pickle.dumps(dataset) + traverse(dataset, only_datapipe=False) - @pytest.mark.skipif(not DILL_AVAILABLE, reason="Package `dill` is not available.") - # TODO: remove this as soon as dill is fully supported - @pytest.mark.xfail(reason="See https://github.com/pytorch/data/issues/237", raises=RecursionError) @parametrize_dataset_mocks(DATASET_MOCKS) - def test_serializable_dill(self, test_home, dataset_mock, config): - import dill - + def test_serializable(self, test_home, dataset_mock, config): dataset_mock.prepare(test_home, config) dataset = datasets.load(dataset_mock.name, **config) - dill.dumps(dataset) + pickle.dumps(dataset) # TODO: we need to enforce not only that both a Shuffler and a ShardingFilter are part of the datapipe, but also # that the Shuffler comes before the ShardingFilter. Early commits in https://github.com/pytorch/vision/pull/5680 From f9b682c313860b3a29259c3c079f36d95b87fe12 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Fri, 1 Apr 2022 20:01:32 +0200 Subject: [PATCH 08/12] add data loader test --- test/test_prototype_builtin_datasets.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/test/test_prototype_builtin_datasets.py b/test/test_prototype_builtin_datasets.py index c1d053a311c..ff6e38ca86c 100644 --- a/test/test_prototype_builtin_datasets.py +++ b/test/test_prototype_builtin_datasets.py @@ -7,6 +7,7 @@ import torch from builtin_dataset_mocks import parametrize_dataset_mocks, DATASET_MOCKS from torch.testing._comparison import assert_equal, TensorLikePair, ObjectPair +from torch.utils.data import DataLoader from torch.utils.data.graph import traverse from torch.utils.data.graph_settings import get_all_graph_pipes from torchdata.datapipes.iter import Shuffler, ShardingFilter @@ -40,7 +41,7 @@ def test_coverage(): ) -@pytest.mark.filterwarnings("error") +# @pytest.mark.filterwarnings("error") class TestCommon: @pytest.mark.parametrize("name", datasets.list_datasets()) def test_info(self, name): @@ -123,6 +124,22 @@ def test_serializable(self, test_home, dataset_mock, config): pickle.dumps(dataset) + @pytest.mark.parametrize("num_workers", [0, 1]) + @parametrize_dataset_mocks(DATASET_MOCKS) + def test_data_loader(self, test_home, dataset_mock, config, num_workers): + dataset_mock.prepare(test_home, config) + dataset = datasets.load(dataset_mock.name, **config) + + dl = DataLoader( + dataset, + batch_size=2, + num_workers=num_workers, + collate_fn=lambda batch: batch, + ) + + for _ in dl: + pass + # TODO: we need to enforce not only that both a Shuffler and a ShardingFilter are part of the datapipe, but also # that the Shuffler comes before the ShardingFilter. Early commits in https://github.com/pytorch/vision/pull/5680 # contain a custom test for that, but we opted to wait for a potential solution / test from torchdata for now. From d237f2f58eed22990027b418e0b9ee165e57a72d Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 4 Apr 2022 19:19:46 +0200 Subject: [PATCH 09/12] parametrize over only_datapipe --- test/test_prototype_builtin_datasets.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/test_prototype_builtin_datasets.py b/test/test_prototype_builtin_datasets.py index ff6e38ca86c..6891095136f 100644 --- a/test/test_prototype_builtin_datasets.py +++ b/test/test_prototype_builtin_datasets.py @@ -110,12 +110,13 @@ def test_transformable(self, test_home, dataset_mock, config): next(iter(dataset.map(transforms.Identity()))) + @pytest.mark.parametrize("only_datapipe", [False, True]) @parametrize_dataset_mocks(DATASET_MOCKS) - def test_traversable(self, test_home, dataset_mock, config): + def test_traversable(self, test_home, dataset_mock, config, only_datapipe): dataset_mock.prepare(test_home, config) dataset = datasets.load(dataset_mock.name, **config) - traverse(dataset, only_datapipe=False) + traverse(dataset, only_datapipe=only_datapipe) @parametrize_dataset_mocks(DATASET_MOCKS) def test_serializable(self, test_home, dataset_mock, config): From 5c3646d02e8972ced559acab8f167ed4fe955c8f Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 4 Apr 2022 19:20:23 +0200 Subject: [PATCH 10/12] draw one sample rather than exhaust data loader --- test/test_prototype_builtin_datasets.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/test_prototype_builtin_datasets.py b/test/test_prototype_builtin_datasets.py index 6891095136f..d9b325734f8 100644 --- a/test/test_prototype_builtin_datasets.py +++ b/test/test_prototype_builtin_datasets.py @@ -138,8 +138,7 @@ def test_data_loader(self, test_home, dataset_mock, config, num_workers): collate_fn=lambda batch: batch, ) - for _ in dl: - pass + next(iter(dl)) # TODO: we need to enforce not only that both a Shuffler and a ShardingFilter are part of the datapipe, but also # that the Shuffler comes before the ShardingFilter. Early commits in https://github.com/pytorch/vision/pull/5680 From fcdae07438e8da8a8533aa8848bc1ff268530430 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 4 Apr 2022 19:21:02 +0200 Subject: [PATCH 11/12] cleanup --- test/test_prototype_builtin_datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_prototype_builtin_datasets.py b/test/test_prototype_builtin_datasets.py index d9b325734f8..8a929b6907c 100644 --- a/test/test_prototype_builtin_datasets.py +++ b/test/test_prototype_builtin_datasets.py @@ -41,7 +41,7 @@ def test_coverage(): ) -# @pytest.mark.filterwarnings("error") +@pytest.mark.filterwarnings("error") class TestCommon: @pytest.mark.parametrize("name", datasets.list_datasets()) def test_info(self, name): From 50ff31e22ea13c6c5f5770093e1f621d68d751da Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 5 Apr 2022 11:18:01 +0200 Subject: [PATCH 12/12] trigger CI