From a63d6f278b6a34e66921eb140cf5719b15f8fc25 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 23 Nov 2021 17:24:07 +0100 Subject: [PATCH 01/16] [DEBUG] run prototype tests on windows --- .circleci/config.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 6a5a06f10ef..8687d578957 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -261,8 +261,9 @@ jobs: python test/test_onnx.py prototype_test: - docker: - - image: circleci/python:3.7 + executor: windows-cpu +# docker: +# - image: circleci/python:3.7 steps: - run: name: Install torch From 5d4a8c17ade9a7c1bacaeb21e53571abf738cae5 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 23 Nov 2021 17:34:27 +0100 Subject: [PATCH 02/16] add scripts to path --- .circleci/config.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 8687d578957..e169ba00603 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -280,7 +280,9 @@ jobs: command: pip install --user --progress-bar=off pytest pytest-mock scipy iopath - run: name: Run tests - command: pytest --junitxml=test-results/junit.xml -v --durations 20 test/test_prototype_*.py + command: | + export PATH="C:\Users\circleci\AppData\Roaming\Python\Python37\Scripts":$PATH + pytest --junitxml=test-results/junit.xml -v --durations 20 test/test_prototype_*.py - store_test_results: path: test-results From 72cabe83c06f08d1a5e00fddabccd1f1ab6fbdbe Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 24 Nov 2021 08:37:54 +0100 Subject: [PATCH 03/16] more debug --- .../prototype/datasets/utils/_internal.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/torchvision/prototype/datasets/utils/_internal.py b/torchvision/prototype/datasets/utils/_internal.py index 3db10183f68..eb429bc5e72 100644 --- a/torchvision/prototype/datasets/utils/_internal.py +++ b/torchvision/prototype/datasets/utils/_internal.py @@ -301,12 +301,23 @@ def fromfile( # a mutable location afterwards. buffer: Union[memoryview, bytearray] try: - buffer = memoryview(mmap.mmap(file.fileno(), 0))[file.tell() :] + # FIXME: DEBUG + a = file.fileno() + b = mmap.mmap(a, 0) + c = memoryview(b) + d = file.tell() + e = c[d:] + buffer = e + # buffer = memoryview(mmap.mmap(file.fileno(), 0))[file.tell() :] # Reading from the memoryview does not advance the file cursor, so we have to do it manually. - file.seek(*(0, io.SEEK_END) if count == -1 else (count * item_size, io.SEEK_CUR)) + offset, whence = (0, io.SEEK_END) if count == -1 else (count * item_size, io.SEEK_CUR) + file.seek(offset, whence) except (PermissionError, io.UnsupportedOperation): # A plain file.read() will give a read-only bytes, so we convert it to bytearray to make it mutable - buffer = bytearray(file.read(-1 if count == -1 else count * item_size)) + f = file.read(-1 if count == -1 else count * item_size) + g = bytearray(f) + buffer = g + # buffer = bytearray(file.read(-1 if count == -1 else count * item_size)) # We cannot use torch.frombuffer() directly, since it only supports the native byte order of the system. Thus, we # read the data with np.frombuffer() with the correct byte order and convert it to the native one with the From 57033e3cffa6bc9ba8ffdbaf3bf50c3b1eba8eb2 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 24 Nov 2021 13:47:24 +0100 Subject: [PATCH 04/16] [DEBUG] remove mmap --- .../prototype/datasets/utils/_internal.py | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/torchvision/prototype/datasets/utils/_internal.py b/torchvision/prototype/datasets/utils/_internal.py index eb429bc5e72..11769c6e716 100644 --- a/torchvision/prototype/datasets/utils/_internal.py +++ b/torchvision/prototype/datasets/utils/_internal.py @@ -299,25 +299,25 @@ def fromfile( # - the file is seekable # we can avoid copying the data for performance. Otherwise we fall back to simply .read() the data and copy it to # a mutable location afterwards. - buffer: Union[memoryview, bytearray] - try: - # FIXME: DEBUG - a = file.fileno() - b = mmap.mmap(a, 0) - c = memoryview(b) - d = file.tell() - e = c[d:] - buffer = e - # buffer = memoryview(mmap.mmap(file.fileno(), 0))[file.tell() :] - # Reading from the memoryview does not advance the file cursor, so we have to do it manually. - offset, whence = (0, io.SEEK_END) if count == -1 else (count * item_size, io.SEEK_CUR) - file.seek(offset, whence) - except (PermissionError, io.UnsupportedOperation): - # A plain file.read() will give a read-only bytes, so we convert it to bytearray to make it mutable - f = file.read(-1 if count == -1 else count * item_size) - g = bytearray(f) - buffer = g - # buffer = bytearray(file.read(-1 if count == -1 else count * item_size)) + # buffer: Union[memoryview, bytearray] + # try: + # # FIXME: DEBUG + # a = file.fileno() + # b = mmap.mmap(a, 0) + # c = memoryview(b) + # d = file.tell() + # e = c[d:] + # buffer = e + # # buffer = memoryview(mmap.mmap(file.fileno(), 0))[file.tell() :] + # # Reading from the memoryview does not advance the file cursor, so we have to do it manually. + # offset, whence = (0, io.SEEK_END) if count == -1 else (count * item_size, io.SEEK_CUR) + # file.seek(offset, whence) + # except (PermissionError, io.UnsupportedOperation): + # A plain file.read() will give a read-only bytes, so we convert it to bytearray to make it mutable + f = file.read(-1 if count == -1 else count * item_size) + g = bytearray(f) + buffer = g + # buffer = bytearray(file.read(-1 if count == -1 else count * item_size)) # We cannot use torch.frombuffer() directly, since it only supports the native byte order of the system. Thus, we # read the data with np.frombuffer() with the correct byte order and convert it to the native one with the From 2d6eea39f37d2f7aa0b7255605b43ca715b258ec Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 24 Nov 2021 14:00:59 +0100 Subject: [PATCH 05/16] test fileno --- torchvision/prototype/datasets/utils/_internal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/prototype/datasets/utils/_internal.py b/torchvision/prototype/datasets/utils/_internal.py index 11769c6e716..104d0aec5a8 100644 --- a/torchvision/prototype/datasets/utils/_internal.py +++ b/torchvision/prototype/datasets/utils/_internal.py @@ -302,7 +302,7 @@ def fromfile( # buffer: Union[memoryview, bytearray] # try: # # FIXME: DEBUG - # a = file.fileno() + a = file.fileno() # b = mmap.mmap(a, 0) # c = memoryview(b) # d = file.tell() From 652750484375830bb4c8d1651d39ced4df25626b Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 25 Nov 2021 11:56:32 +0100 Subject: [PATCH 06/16] ignore fileno error --- torchvision/prototype/datasets/utils/_internal.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/torchvision/prototype/datasets/utils/_internal.py b/torchvision/prototype/datasets/utils/_internal.py index 104d0aec5a8..51275e973c7 100644 --- a/torchvision/prototype/datasets/utils/_internal.py +++ b/torchvision/prototype/datasets/utils/_internal.py @@ -302,7 +302,10 @@ def fromfile( # buffer: Union[memoryview, bytearray] # try: # # FIXME: DEBUG - a = file.fileno() + try: + a = file.fileno() + except io.UnsupportedOperation: + pass # b = mmap.mmap(a, 0) # c = memoryview(b) # d = file.tell() From 7d798de601762870fd8dfbbdc4dddafd94b3eb13 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Fri, 26 Nov 2021 08:44:32 +0100 Subject: [PATCH 07/16] test fileno and mmap --- torchvision/prototype/datasets/utils/_internal.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/torchvision/prototype/datasets/utils/_internal.py b/torchvision/prototype/datasets/utils/_internal.py index 51275e973c7..3894b6831f8 100644 --- a/torchvision/prototype/datasets/utils/_internal.py +++ b/torchvision/prototype/datasets/utils/_internal.py @@ -304,6 +304,10 @@ def fromfile( # # FIXME: DEBUG try: a = file.fileno() + try: + b = mmap.mmap(a, 0) + except PermissionError: + pass except io.UnsupportedOperation: pass # b = mmap.mmap(a, 0) From d50ee1b45fdb4bc68def0b11bce367aaeedee859 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 2 Dec 2021 11:21:21 +0100 Subject: [PATCH 08/16] add debug prints --- torchvision/prototype/datasets/utils/_internal.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/torchvision/prototype/datasets/utils/_internal.py b/torchvision/prototype/datasets/utils/_internal.py index 3894b6831f8..eb052be0454 100644 --- a/torchvision/prototype/datasets/utils/_internal.py +++ b/torchvision/prototype/datasets/utils/_internal.py @@ -304,12 +304,14 @@ def fromfile( # # FIXME: DEBUG try: a = file.fileno() + print("fileno worked") try: b = mmap.mmap(a, 0) + print("mmap worked") except PermissionError: - pass + print("mmap failed") except io.UnsupportedOperation: - pass + print("fileno failed") # b = mmap.mmap(a, 0) # c = memoryview(b) # d = file.tell() From ad39bfe9b847359330a1e2a7af15e38d486cbcde Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 2 Dec 2021 11:29:10 +0100 Subject: [PATCH 09/16] debug --- torchvision/prototype/datasets/utils/_internal.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/torchvision/prototype/datasets/utils/_internal.py b/torchvision/prototype/datasets/utils/_internal.py index eb052be0454..eb537aa535d 100644 --- a/torchvision/prototype/datasets/utils/_internal.py +++ b/torchvision/prototype/datasets/utils/_internal.py @@ -303,15 +303,10 @@ def fromfile( # try: # # FIXME: DEBUG try: - a = file.fileno() - print("fileno worked") - try: - b = mmap.mmap(a, 0) - print("mmap worked") - except PermissionError: - print("mmap failed") - except io.UnsupportedOperation: - print("fileno failed") + mmap.mmap(file.fileno(), 0) + print("mmap worked") + except PermissionError as error: + print(f"mmap failed with {error}") # b = mmap.mmap(a, 0) # c = memoryview(b) # d = file.tell() From dd4d92c855703b6959af090442b4f6f716cad57e Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 2 Dec 2021 11:35:59 +0100 Subject: [PATCH 10/16] debug --- torchvision/prototype/datasets/utils/_internal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/prototype/datasets/utils/_internal.py b/torchvision/prototype/datasets/utils/_internal.py index eb537aa535d..5308ea85c64 100644 --- a/torchvision/prototype/datasets/utils/_internal.py +++ b/torchvision/prototype/datasets/utils/_internal.py @@ -306,7 +306,7 @@ def fromfile( mmap.mmap(file.fileno(), 0) print("mmap worked") except PermissionError as error: - print(f"mmap failed with {error}") + print(f"mmap failed with {type(error)}: {error}") # b = mmap.mmap(a, 0) # c = memoryview(b) # d = file.tell() From 20c0d32b21411940d123e9ad548229b1d49fcfbf Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 2 Dec 2021 15:54:44 +0100 Subject: [PATCH 11/16] more debug --- torchvision/prototype/datasets/_builtin/mnist.py | 2 ++ torchvision/prototype/datasets/utils/_internal.py | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/torchvision/prototype/datasets/_builtin/mnist.py b/torchvision/prototype/datasets/_builtin/mnist.py index c242207b7d7..68ef075f0ce 100644 --- a/torchvision/prototype/datasets/_builtin/mnist.py +++ b/torchvision/prototype/datasets/_builtin/mnist.py @@ -75,6 +75,8 @@ def __iter__(self) -> Iterator[torch.Tensor]: file.seek(num_bytes_per_value * count * start, 1) for _ in range(stop - start): + print("#" * 80) + print(f"{shape}: {_}") yield read(dtype=dtype, count=count).reshape(shape) diff --git a/torchvision/prototype/datasets/utils/_internal.py b/torchvision/prototype/datasets/utils/_internal.py index 5308ea85c64..f38d033a4b5 100644 --- a/torchvision/prototype/datasets/utils/_internal.py +++ b/torchvision/prototype/datasets/utils/_internal.py @@ -302,11 +302,13 @@ def fromfile( # buffer: Union[memoryview, bytearray] # try: # # FIXME: DEBUG + + # https://bugs.python.org/issue12562 try: - mmap.mmap(file.fileno(), 0) + mmap.mmap(file.fileno(), tagname=str(float(torch.rand(()))), length=0) print("mmap worked") except PermissionError as error: - print(f"mmap failed with {type(error)}: {error}") + print(f"mmap failed with {error}") # b = mmap.mmap(a, 0) # c = memoryview(b) # d = file.tell() From 08125a063a2bbfdad52a9c2cde4598181c681de4 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Fri, 3 Dec 2021 15:31:49 +0100 Subject: [PATCH 12/16] exclude windows from mmap --- .../prototype/datasets/utils/_internal.py | 54 ++++++++----------- 1 file changed, 23 insertions(+), 31 deletions(-) diff --git a/torchvision/prototype/datasets/utils/_internal.py b/torchvision/prototype/datasets/utils/_internal.py index f38d033a4b5..c49bf46c435 100644 --- a/torchvision/prototype/datasets/utils/_internal.py +++ b/torchvision/prototype/datasets/utils/_internal.py @@ -8,6 +8,7 @@ import os.path import pathlib import pickle +import platform from typing import BinaryIO from typing import ( Sequence, @@ -260,6 +261,11 @@ def _make_sharded_datapipe(root: str, dataset_size: int) -> IterDataPipe: return dp +def _read_mutable_buffer_fallback(file, count, item_size): + # A plain file.read() will give a read-only bytes, so we convert it to bytearray to make it mutable + return bytearray(file.read(-1 if count == -1 else count * item_size)) + + def fromfile( file: BinaryIO, *, @@ -293,37 +299,23 @@ def fromfile( item_size = (torch.finfo if dtype.is_floating_point else torch.iinfo)(dtype).bits // 8 np_dtype = byte_order + char + str(item_size) - # PyTorch does not support tensors with underlying read-only memory. In case - # - the file has a .fileno(), - # - the file was opened for updating, i.e. 'r+b' or 'w+b', - # - the file is seekable - # we can avoid copying the data for performance. Otherwise we fall back to simply .read() the data and copy it to - # a mutable location afterwards. - # buffer: Union[memoryview, bytearray] - # try: - # # FIXME: DEBUG - - # https://bugs.python.org/issue12562 - try: - mmap.mmap(file.fileno(), tagname=str(float(torch.rand(()))), length=0) - print("mmap worked") - except PermissionError as error: - print(f"mmap failed with {error}") - # b = mmap.mmap(a, 0) - # c = memoryview(b) - # d = file.tell() - # e = c[d:] - # buffer = e - # # buffer = memoryview(mmap.mmap(file.fileno(), 0))[file.tell() :] - # # Reading from the memoryview does not advance the file cursor, so we have to do it manually. - # offset, whence = (0, io.SEEK_END) if count == -1 else (count * item_size, io.SEEK_CUR) - # file.seek(offset, whence) - # except (PermissionError, io.UnsupportedOperation): - # A plain file.read() will give a read-only bytes, so we convert it to bytearray to make it mutable - f = file.read(-1 if count == -1 else count * item_size) - g = bytearray(f) - buffer = g - # buffer = bytearray(file.read(-1 if count == -1 else count * item_size)) + buffer: Union[memoryview, bytearray] + if platform.system() != "Windows": + # PyTorch does not support tensors with underlying read-only memory. In case + # - the file has a .fileno(), + # - the file was opened for updating, i.e. 'r+b' or 'w+b', + # - the file is seekable + # we can avoid copying the data for performance. Otherwise we fall back to simply .read() the data and copy it to + # a mutable location afterwards. + try: + buffer = memoryview(mmap.mmap(file.fileno(), 0))[file.tell() :] + # Reading from the memoryview does not advance the file cursor, so we have to do it manually. + file.seek(*(0, io.SEEK_END) if count == -1 else (count * item_size, io.SEEK_CUR)) + except (PermissionError, io.UnsupportedOperation): + buffer = _read_mutable_buffer_fallback(file, count, item_size) + else: + # + buffer = _read_mutable_buffer_fallback(file, count, item_size) # We cannot use torch.frombuffer() directly, since it only supports the native byte order of the system. Thus, we # read the data with np.frombuffer() with the correct byte order and convert it to the native one with the From f52d9c0b0542a9be5e6fbf0fcc376e85caf8d2a0 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Fri, 3 Dec 2021 15:43:21 +0100 Subject: [PATCH 13/16] add comment for windows --- torchvision/prototype/datasets/utils/_internal.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/torchvision/prototype/datasets/utils/_internal.py b/torchvision/prototype/datasets/utils/_internal.py index c49bf46c435..e4d8fb182f9 100644 --- a/torchvision/prototype/datasets/utils/_internal.py +++ b/torchvision/prototype/datasets/utils/_internal.py @@ -305,8 +305,8 @@ def fromfile( # - the file has a .fileno(), # - the file was opened for updating, i.e. 'r+b' or 'w+b', # - the file is seekable - # we can avoid copying the data for performance. Otherwise we fall back to simply .read() the data and copy it to - # a mutable location afterwards. + # we can avoid copying the data for performance. Otherwise we fall back to simply .read() the data and copy it + # to a mutable location afterwards. try: buffer = memoryview(mmap.mmap(file.fileno(), 0))[file.tell() :] # Reading from the memoryview does not advance the file cursor, so we have to do it manually. @@ -314,7 +314,8 @@ def fromfile( except (PermissionError, io.UnsupportedOperation): buffer = _read_mutable_buffer_fallback(file, count, item_size) else: - # + # On Windows just trying to call mmap.mmap() on a file that does not support it, may corrupt the internal state + # so no data can be read afterwards. Thus, we simply ignore the possible speed-up. buffer = _read_mutable_buffer_fallback(file, count, item_size) # We cannot use torch.frombuffer() directly, since it only supports the native byte order of the system. Thus, we From 48d650c010a7590c3015b1c777c826ce229eb888 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Fri, 3 Dec 2021 15:43:43 +0100 Subject: [PATCH 14/16] revert CI changes --- .circleci/config.yml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index e169ba00603..6a5a06f10ef 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -261,9 +261,8 @@ jobs: python test/test_onnx.py prototype_test: - executor: windows-cpu -# docker: -# - image: circleci/python:3.7 + docker: + - image: circleci/python:3.7 steps: - run: name: Install torch @@ -280,9 +279,7 @@ jobs: command: pip install --user --progress-bar=off pytest pytest-mock scipy iopath - run: name: Run tests - command: | - export PATH="C:\Users\circleci\AppData\Roaming\Python\Python37\Scripts":$PATH - pytest --junitxml=test-results/junit.xml -v --durations 20 test/test_prototype_*.py + command: pytest --junitxml=test-results/junit.xml -v --durations 20 test/test_prototype_*.py - store_test_results: path: test-results From 2bfa35fb86c52ffbe84955bfa0058ddec5654895 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Fri, 3 Dec 2021 16:14:15 +0100 Subject: [PATCH 15/16] remove debug code --- torchvision/prototype/datasets/_builtin/mnist.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/torchvision/prototype/datasets/_builtin/mnist.py b/torchvision/prototype/datasets/_builtin/mnist.py index 68ef075f0ce..c242207b7d7 100644 --- a/torchvision/prototype/datasets/_builtin/mnist.py +++ b/torchvision/prototype/datasets/_builtin/mnist.py @@ -75,8 +75,6 @@ def __iter__(self) -> Iterator[torch.Tensor]: file.seek(num_bytes_per_value * count * start, 1) for _ in range(stop - start): - print("#" * 80) - print(f"{shape}: {_}") yield read(dtype=dtype, count=count).reshape(shape) From 4560946037c0b5f1a8ac01208baf833ed1459c88 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Fri, 3 Dec 2021 16:16:06 +0100 Subject: [PATCH 16/16] appease mypy --- torchvision/prototype/datasets/utils/_internal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/prototype/datasets/utils/_internal.py b/torchvision/prototype/datasets/utils/_internal.py index e4d8fb182f9..c4b91b4a14b 100644 --- a/torchvision/prototype/datasets/utils/_internal.py +++ b/torchvision/prototype/datasets/utils/_internal.py @@ -261,7 +261,7 @@ def _make_sharded_datapipe(root: str, dataset_size: int) -> IterDataPipe: return dp -def _read_mutable_buffer_fallback(file, count, item_size): +def _read_mutable_buffer_fallback(file: BinaryIO, count: int, item_size: int) -> bytearray: # A plain file.read() will give a read-only bytes, so we convert it to bytearray to make it mutable return bytearray(file.read(-1 if count == -1 else count * item_size))