|
3 | 3 | import gzip
|
4 | 4 | import io
|
5 | 5 | import lzma
|
6 |
| -import mmap |
7 | 6 | import os
|
8 | 7 | import os.path
|
9 | 8 | import pathlib
|
10 | 9 | import pickle
|
11 |
| -import platform |
12 | 10 | from typing import BinaryIO
|
13 | 11 | from typing import (
|
14 | 12 | Sequence,
|
|
32 | 30 | import torch.utils.data
|
33 | 31 | from torchdata.datapipes.iter import IoPathFileLister, IoPathFileOpener, IterDataPipe, ShardingFilter, Shuffler
|
34 | 32 | from torchdata.datapipes.utils import StreamWrapper
|
| 33 | +from torchvision.prototype.utils._internal import fromfile |
35 | 34 |
|
36 | 35 |
|
37 | 36 | __all__ = [
|
|
46 | 45 | "path_accessor",
|
47 | 46 | "path_comparator",
|
48 | 47 | "Decompressor",
|
49 |
| - "fromfile", |
50 | 48 | "read_flo",
|
51 | 49 | "hint_sharding",
|
52 | 50 | ]
|
@@ -267,69 +265,6 @@ def _make_sharded_datapipe(root: str, dataset_size: int) -> IterDataPipe[Dict[st
|
267 | 265 | return dp
|
268 | 266 |
|
269 | 267 |
|
270 |
| -def _read_mutable_buffer_fallback(file: BinaryIO, count: int, item_size: int) -> bytearray: |
271 |
| - # A plain file.read() will give a read-only bytes, so we convert it to bytearray to make it mutable |
272 |
| - return bytearray(file.read(-1 if count == -1 else count * item_size)) |
273 |
| - |
274 |
| - |
275 |
| -def fromfile( |
276 |
| - file: BinaryIO, |
277 |
| - *, |
278 |
| - dtype: torch.dtype, |
279 |
| - byte_order: str, |
280 |
| - count: int = -1, |
281 |
| -) -> torch.Tensor: |
282 |
| - """Construct a tensor from a binary file. |
283 |
| -
|
284 |
| - .. note:: |
285 |
| -
|
286 |
| - This function is similar to :func:`numpy.fromfile` with two notable differences: |
287 |
| -
|
288 |
| - 1. This function only accepts an open binary file, but not a path to it. |
289 |
| - 2. This function has an additional ``byte_order`` parameter, since PyTorch's ``dtype``'s do not support that |
290 |
| - concept. |
291 |
| -
|
292 |
| - .. note:: |
293 |
| -
|
294 |
| - If the ``file`` was opened in update mode, i.e. "r+b" or "w+b", reading data is much faster. Be aware that as |
295 |
| - long as the file is still open, inplace operations on the returned tensor will reflect back to the file. |
296 |
| -
|
297 |
| - Args: |
298 |
| - file (IO): Open binary file. |
299 |
| - dtype (torch.dtype): Data type of the underlying data as well as of the returned tensor. |
300 |
| - byte_order (str): Byte order of the data. Can be "little" or "big" endian. |
301 |
| - count (int): Number of values of the returned tensor. If ``-1`` (default), will read the complete file. |
302 |
| - """ |
303 |
| - byte_order = "<" if byte_order == "little" else ">" |
304 |
| - char = "f" if dtype.is_floating_point else ("i" if dtype.is_signed else "u") |
305 |
| - item_size = (torch.finfo if dtype.is_floating_point else torch.iinfo)(dtype).bits // 8 |
306 |
| - np_dtype = byte_order + char + str(item_size) |
307 |
| - |
308 |
| - buffer: Union[memoryview, bytearray] |
309 |
| - if platform.system() != "Windows": |
310 |
| - # PyTorch does not support tensors with underlying read-only memory. In case |
311 |
| - # - the file has a .fileno(), |
312 |
| - # - the file was opened for updating, i.e. 'r+b' or 'w+b', |
313 |
| - # - the file is seekable |
314 |
| - # we can avoid copying the data for performance. Otherwise we fall back to simply .read() the data and copy it |
315 |
| - # to a mutable location afterwards. |
316 |
| - try: |
317 |
| - buffer = memoryview(mmap.mmap(file.fileno(), 0))[file.tell() :] |
318 |
| - # Reading from the memoryview does not advance the file cursor, so we have to do it manually. |
319 |
| - file.seek(*(0, io.SEEK_END) if count == -1 else (count * item_size, io.SEEK_CUR)) |
320 |
| - except (PermissionError, io.UnsupportedOperation): |
321 |
| - buffer = _read_mutable_buffer_fallback(file, count, item_size) |
322 |
| - else: |
323 |
| - # On Windows just trying to call mmap.mmap() on a file that does not support it, may corrupt the internal state |
324 |
| - # so no data can be read afterwards. Thus, we simply ignore the possible speed-up. |
325 |
| - buffer = _read_mutable_buffer_fallback(file, count, item_size) |
326 |
| - |
327 |
| - # We cannot use torch.frombuffer() directly, since it only supports the native byte order of the system. Thus, we |
328 |
| - # read the data with np.frombuffer() with the correct byte order and convert it to the native one with the |
329 |
| - # successive .astype() call. |
330 |
| - return torch.from_numpy(np.frombuffer(buffer, dtype=np_dtype, count=count).astype(np_dtype[1:], copy=False)) |
331 |
| - |
332 |
| - |
333 | 268 | def read_flo(file: BinaryIO) -> torch.Tensor:
|
334 | 269 | if file.read(4) != b"PIEH":
|
335 | 270 | raise ValueError("Magic number incorrect. Invalid .flo file")
|
|
0 commit comments