fix fromfile on windows (#4980)

pmeier · web-flow · commit dcf5dc8747b9 · 2021-12-03T16:22:33.000+01:00
* exclude windows from mmap

* add comment for windows

* appease mypy
diff --git a/torchvision/prototype/datasets/utils/_internal.py b/torchvision/prototype/datasets/utils/_internal.py
@@ -8,6 +8,7 @@
 import os.path
 import pathlib
 import pickle
+import platform
 from typing import BinaryIO
 from typing import (
     Sequence,
@@ -260,6 +261,11 @@ def _make_sharded_datapipe(root: str, dataset_size: int) -> IterDataPipe:
     return dp
 
 
+def _read_mutable_buffer_fallback(file: BinaryIO, count: int, item_size: int) -> bytearray:
+    # A plain file.read() will give a read-only bytes, so we convert it to bytearray to make it mutable
+    return bytearray(file.read(-1 if count == -1 else count * item_size))
+
+
 def fromfile(
     file: BinaryIO,
     *,
@@ -293,20 +299,24 @@ def fromfile(
     item_size = (torch.finfo if dtype.is_floating_point else torch.iinfo)(dtype).bits // 8
     np_dtype = byte_order + char + str(item_size)
 
-    # PyTorch does not support tensors with underlying read-only memory. In case
-    # - the file has a .fileno(),
-    # - the file was opened for updating, i.e. 'r+b' or 'w+b',
-    # - the file is seekable
-    # we can avoid copying the data for performance. Otherwise we fall back to simply .read() the data and copy it to
-    # a mutable location afterwards.
     buffer: Union[memoryview, bytearray]
-    try:
-        buffer = memoryview(mmap.mmap(file.fileno(), 0))[file.tell() :]
-        # Reading from the memoryview does not advance the file cursor, so we have to do it manually.
-        file.seek(*(0, io.SEEK_END) if count == -1 else (count * item_size, io.SEEK_CUR))
-    except (PermissionError, io.UnsupportedOperation):
-        # A plain file.read() will give a read-only bytes, so we convert it to bytearray to make it mutable
-        buffer = bytearray(file.read(-1 if count == -1 else count * item_size))
+    if platform.system() != "Windows":
+        # PyTorch does not support tensors with underlying read-only memory. In case
+        # - the file has a .fileno(),
+        # - the file was opened for updating, i.e. 'r+b' or 'w+b',
+        # - the file is seekable
+        # we can avoid copying the data for performance. Otherwise we fall back to simply .read() the data and copy it
+        # to a mutable location afterwards.
+        try:
+            buffer = memoryview(mmap.mmap(file.fileno(), 0))[file.tell() :]
+            # Reading from the memoryview does not advance the file cursor, so we have to do it manually.
+            file.seek(*(0, io.SEEK_END) if count == -1 else (count * item_size, io.SEEK_CUR))
+        except (PermissionError, io.UnsupportedOperation):
+            buffer = _read_mutable_buffer_fallback(file, count, item_size)
+    else:
+        # On Windows just trying to call mmap.mmap() on a file that does not support it, may corrupt the internal state
+        # so no data can be read afterwards. Thus, we simply ignore the possible speed-up.
+        buffer = _read_mutable_buffer_fallback(file, count, item_size)
 
     # We cannot use torch.frombuffer() directly, since it only supports the native byte order of the system. Thus, we
     # read the data with np.frombuffer() with the correct byte order and convert it to the native one with the