diff --git a/datastore/__init__.py b/datastore/__init__.py index 794740b..ffcbaba 100644 --- a/datastore/__init__.py +++ b/datastore/__init__.py @@ -1,8 +1,46 @@ -__version__ = '0.3.6' -__author__ = 'Juan Batiz-Benet' -__email__ = 'juan@benet.ai' +""" +Datastore is a generic layer of abstraction for data store and database access. +It is a **simple** API with the aim to enable application development in a +datastore-agnostic way, allowing datastores to be swapped seamlessly without +changing application code. Thus, one can leverage different datastores with +different strengths without committing the application to one datastore +throughout its lifetime. +""" -from .core import * -from .filesystem import * +__version__ = "0.3.6" +__author__ = "Juan Batiz-Benet, Alexander Schlarb" +__email__ = "juan@benet.ai, alexander@ninetailed.ninja" +__all__ = ( + "Key", "Namespace", + "BinaryNullDatastore", "BinaryDictDatastore", + "ObjectNullDatastore", "ObjectDictDatastore", + "Query", "Cursor", + "SerializerAdapter", + + "abc", "typing", "util" +) + +# import core.key +from .core.key import Key +from .core.key import Namespace + +# import core.binarystore, core.objectstore +from .core.binarystore import NullDatastore as BinaryNullDatastore +from .core.binarystore import DictDatastore as BinaryDictDatastore + +from .core.objectstore import NullDatastore as ObjectNullDatastore +from .core.objectstore import DictDatastore as ObjectDictDatastore + +# import core.query +from .core.query import Query +from .core.query import Cursor + +# import core.serialize +from .core.serialize import SerializerAdapter + + +### Exposed submodules ### from . import abc +from . import typing +from . import util diff --git a/datastore/adapter/directory.py b/datastore/adapter/directory.py index e068ae6..9045aa2 100644 --- a/datastore/adapter/directory.py +++ b/datastore/adapter/directory.py @@ -46,7 +46,7 @@ async def directory(self, dir_key: datastore.Key, exist_ok: bool = False) -> boo try: await (await super().get(dir_key)).aclose() except KeyError: - await super()._put(dir_key, datastore.receive_channel_from([])) + await super()._put(dir_key, datastore.util.receive_channel_from([])) return True else: if not exist_ok: @@ -86,7 +86,7 @@ async def directory_add(self, dir_key: datastore.Key, key: datastore.Key, if key_str not in dir_items: dir_items.append(key_str) - await super()._put(dir_key, datastore.receive_channel_from(dir_items)) + await super()._put(dir_key, datastore.util.receive_channel_from(dir_items)) @typing.no_type_check @@ -112,7 +112,7 @@ async def directory_remove(self, dir_key: datastore.Key, key: datastore.Key, if not missing_ok: raise KeyError(f"{key} in {dir_key}") from None else: - await super()._put(dir_key, datastore.receive_channel_from(dir_items)) + await super()._put(dir_key, datastore.util.receive_channel_from(dir_items)) diff --git a/datastore/core/__init__.py b/datastore/core/__init__.py deleted file mode 100644 index 62f9bc5..0000000 --- a/datastore/core/__init__.py +++ /dev/null @@ -1,34 +0,0 @@ -""" -datastore is a generic layer of abstraction for data store and database access. -It is a **simple** API with the aim to enable application development in a -datastore-agnostic way, allowing datastores to be swapped seamlessly without -changing application code. Thus, one can leverage different datastores with -different strengths without committing the application to one datastore -throughout its lifetime. -""" - -__version__ = '0.3.6' -__author__ = 'Juan Batiz-Benet, Alexander Schlarb' -__email__ = 'juan@benet.ai, alexander@ninetailed.ninja' - -# import key -from .key import Key -from .key import Namespace - -# import binarystore, objectstore -from .binarystore import NullDatastore as BinaryNullDatastore -from .binarystore import DictDatastore as BinaryDictDatastore - -from .objectstore import NullDatastore as ObjectNullDatastore -from .objectstore import DictDatastore as ObjectDictDatastore - -# import query -from .query import Query -from .query import Cursor - -# import serialize -from .serialize import SerializerAdapter - -# import util.stream -from .util.stream import receive_channel_from -from .util.stream import receive_stream_from diff --git a/datastore/core/test/conftest.py b/datastore/core/test/conftest.py index c0a0562..703b77b 100644 --- a/datastore/core/test/conftest.py +++ b/datastore/core/test/conftest.py @@ -35,11 +35,11 @@ def encode(self, value): def check_length(self, length: int) -> None: - try: - for sn in self.stores: - assert len(sn) == length - except TypeError: - pass + for sn in self.stores: + try: + assert len(sn) == length # type: ignore[arg-type] + except TypeError: + pass async def subtest_remove_nonexistent(self) -> None: @@ -60,11 +60,12 @@ async def subtest_remove_nonexistent(self) -> None: async def subtest_insert_elems(self) -> None: sn: DS + key: datastore.Key value: int # insert numelems elems for value in range(0, self.numelems): - key: datastore.Key = self.pkey.child(value) + key = self.pkey.child(value) for sn in self.stores: assert not await sn.contains(key) await sn.put(key, self.encode(value)) @@ -75,7 +76,7 @@ async def subtest_insert_elems(self) -> None: self.check_length(self.numelems) for value in range(0, self.numelems): - key: datastore.Key = self.pkey.child(value) + key = self.pkey.child(value) for sn in self.stores: assert await sn.contains(key) assert await sn.get_all(key) == self.encode(value) @@ -83,11 +84,12 @@ async def subtest_insert_elems(self) -> None: self.check_length(self.numelems) + @typing.no_type_check #FIXME: This method is broken async def check_query(self, query, total, slice) -> datastore.Cursor: assert not self.is_binary # Queries are only supported for object stores - allitems: List[int] = list(range(0, total)) - sn: datastore.ObjectDatastore + allitems: typing.List[int] = list(range(0, total)) + sn: datastore.abc.ObjectDatastore resultset: datastore.Cursor for sn in self.stores: @@ -110,11 +112,12 @@ async def check_query(self, query, total, slice) -> datastore.Cursor: return resultset + @typing.no_type_check #FIXME: This method is broken async def subtest_queries(self) -> None: if self.is_binary: return # Not supported on binary datastores - sn: datastore.ObjectDatastore + sn: datastore.abc.ObjectDatastore value: int for value in range(0, self.numelems): diff --git a/datastore/core/test/test_store.py b/datastore/core/test/test_store.py index 06229e5..6cbd854 100644 --- a/datastore/core/test/test_store.py +++ b/datastore/core/test/test_store.py @@ -1,15 +1,14 @@ import importlib import logging -import unittest from typing import Callable, List, Tuple, Type, TypeVar, Union, TYPE_CHECKING import pytest import trio.testing -from datastore.core import BinaryDictDatastore, ObjectDictDatastore -from datastore.core import BinaryNullDatastore, ObjectNullDatastore -from datastore.core.key import Key -from datastore.core.query import Query +from datastore import BinaryDictDatastore, ObjectDictDatastore +from datastore import BinaryNullDatastore, ObjectNullDatastore +from datastore import Key +from datastore import Query import datastore.adapter.logging diff --git a/datastore/filesystem/__init__.py b/datastore/filesystem/__init__.py index 54e1cbd..9352267 100644 --- a/datastore/filesystem/__init__.py +++ b/datastore/filesystem/__init__.py @@ -1,5 +1,6 @@ -__version__ = '1.2' -__author__ = 'Juan Batiz-Benet ' +__version__ = "2.0" +__author__ = "Juan Batiz-Benet, Alexander Schlarb" +__email__ = "juan@benet.ai, alexander@ninetailed.ninja" __doc__ = """ Filesystem datastore implementation """ diff --git a/datastore/filesystem/filesystem.py b/datastore/filesystem/filesystem.py index f3e5c03..2a86007 100644 --- a/datastore/filesystem/filesystem.py +++ b/datastore/filesystem/filesystem.py @@ -1,8 +1,88 @@ +import errno +import io import os -from datastore.abc import BinaryDatastore - - -class FileSystemDatastore(BinaryDatastore): +import pathlib +import typing + +import trio + +import datastore +import datastore.abc + +from .util import statx + + +# Make default buffer larger to try to compensate for the thread switching overhead +DEFAULT_BUFFER_SIZE = io.DEFAULT_BUFFER_SIZE * 10 + + +stat_result_t = typing.Union[os.stat_result, statx.stat_result] + + +class FileReader(datastore.abc.ReceiveStream): + __slots__ = ("_file") + + _file: 'trio._file_io.AsyncIOWrapper' + + + def __init__(self, file: 'trio._file_io.AsyncIOWrapper', stat: stat_result_t): + super().__init__() + + self._file = file + + self.size = stat.st_size + self.atime = stat.st_atime + self.mtime = stat.st_mtime + if stat.st_atime_ns: + self.atime = stat.st_atime_ns / 1_000_000_000 + if stat.st_mtime_ns: + self.mtime = stat.st_mtime_ns / 1_000_000_000 + + # Finding btime from stat is tricky and platform dependant + st_birthtime_ns: typing.Optional[int] = getattr(stat, "st_birthtime_ns", None) + if st_birthtime_ns: + # Linux with statx patch exposes this + self.btime = st_birthtime_ns / 1_000_000_000 + elif hasattr(stat, "st_birthtime") and stat.st_birthtime: + # FreeBSD/macOS has this field + self.btime = stat.st_birthtime + elif os.name == "nt": # Windows stores btime as ctime + self.btime = stat.st_ctime + if stat.st_ctime_ns: + self.btime = stat.st_ctime_ns / 1_000_000_000 + + + async def receive_some(self, max_bytes: typing.Optional[int] = None): + if max_bytes: + buf = await self._file.read(max_bytes) + else: + buf = await self._file.read(DEFAULT_BUFFER_SIZE) + + if len(buf) == 0: + await self.aclose() + + return buf + + + async def aclose(self) -> None: + await self._file.aclose() + + + @classmethod + async def from_path(cls, filepath: typing.Union[str, bytes, os.PathLike]): + # Open file + file = await trio.open_file(filepath, "rb") + try: + # Query file stat data + stat = await trio.run_sync_in_worker_thread(statx.stat, file.fileno(), cancellable=True) + + return cls(file, stat) + except BaseException: + await file.aclose() + raise + + +class FileSystemDatastore(datastore.abc.BinaryDatastore): """Simple flat-file datastore. FileSystemDatastore will store objects in independent files in the host's @@ -28,17 +108,7 @@ class FileSystemDatastore(BinaryDatastore): Implementation Notes: - Separating key namespaces (and their parameters) within directories allows - granular querying for under a specific key. For example, a query with key:: - - Key('/data/Comedy:MontyPython/Sketch:CheeseShop') - - will query for all objects under `Sketch:CheeseShop` independently of - queries for:: - - Key('/data/Comedy:MontyPython/Sketch') - - Also, using the `.obj` extension gets around the ambiguity of having both a + Using the `.obj` extension gets around the ambiguity of having both a `CheeseShop` object and directory:: /data/Comedy/MontyPython/Sketch/CheeseShop.obj @@ -65,145 +135,208 @@ class FileSystemDatastore(BinaryDatastore): """ - def __init__(self, root, case_sensitive=True): + case_sensitive: bool + object_extension: str + remove_empty: bool + root_path: pathlib.PurePath + + def __init__(self, root: typing.Union[os.PathLike, str], *, + case_sensitive: bool = True, remove_empty: bool = True): """Initialize the datastore with given root directory `root`. - Args: - root: A path at which to mount this filesystem datastore. + Arguments + --------- + root + A path at which to mount this filesystem datastore. + case_sensitive + Keep case of all path items (True) or lower case them (False) + before passing them to the OS. Note that, if the underlying + file system is case-insensitive this option will not prevent + conflicts between paths that differ in case only. + remove_empty + Attempt to remove empty directories in the underlying file + system. While this is enabled every successful delete operation + will be followed by at least one extra context switch to invoke + the `rmdir` system call. """ - root = os.path.normpath(root) - if not root: - raise ValueError('root path must not be empty (\'.\' for current directory)') - - os.makedirs(root, exist_ok=True) + raise ValueError('root path must not be empty (use \'.\' for current directory)') + + root = pathlib.Path(root) + root.mkdir(parents=True, exist_ok=True) self.object_extension = '.obj' - self.ignore_list = list() self.root_path = root self.case_sensitive = bool(case_sensitive) - + self.remove_empty = bool(remove_empty) + + # object paths - - def relative_path(self, key): + + + def relative_path(self, key: datastore.Key) -> pathlib.PurePath: """Returns the relative path for given `key`""" - key = str(key) # stringify - key = key.replace(':', '/') # turn namespace delimiters into slashes - key = key[1:] # remove first slash (absolute) + skey = str(key) # stringify + skey = skey.replace(':', '/') # turn namespace delimiters into slashes + skey = skey[1:] # remove first slash (absolute) if not self.case_sensitive: - key = key.lower() # coerce to lowercase - return os.path.normpath(key) + skey = skey.lower() # coerce to lowercase + return pathlib.PurePath(skey) - def path(self, key): + def path(self, key: datastore.Key) -> pathlib.PurePath: """Returns the `path` for given `key`""" - return os.path.join(self.root_path, self.relative_path(key)) + return self.root_path / self.relative_path(key) - def relative_object_path(self, key): + def relative_object_path(self, key: datastore.Key) -> pathlib.PurePath: """Returns the relative path for object pointed by `key`.""" - return self.relative_path(key) + self.object_extension + return self.relative_path(key).with_suffix(self.object_extension) - def object_path(self, key): + def object_path(self, key: datastore.Key): """return the object path for `key`.""" - return os.path.join(self.root_path, self.relative_object_path(key)) - - # object IO - - @staticmethod - def _write_object(path, value): - """write out `object` to file at `path`""" - os.makedirs(os.path.dirname(path), exist_ok=True) - - with open(path, 'w') as f: - f.write(value) - - @staticmethod - def _read_object(path): - """read in object from file at `path`""" - if not os.path.exists(path): - return None - - if os.path.isdir(path): - raise RuntimeError('%s is a directory, not a file.' % path) - - with open(path) as f: - file_contents = f.read() - - return file_contents - - @staticmethod - def _read_object_gen(iterable): - """Generator that reads objects in from filenames in `iterable`.""" - for filename in iterable: - yield FileSystemDatastore._read_object(filename) - + return self.root_path / self.relative_object_path(key) + + # Datastore implementation - - def get(self, key): - """Return the object named by key, or None, if it does not exist. - - Args: - key: Key naming the object to retrieve - - Returns: - object or None + + + async def get(self, key: datastore.Key) -> datastore.abc.ReceiveStream: + """Returns the data named by key, or raises KeyError otherwise. + + It is suggested to read larger chunks of the returned stream to reduce + the overhead for doing a context switch for each system call. + + Arguments + --------- + key + Key naming the data to retrieve + + Raises + ------ + KeyError + The given object was not present in this datastore + RuntimeError + The given ``key`` names a subtree, not a value """ path = self.object_path(key) - return FileSystemDatastore._read_object(path) - - def put(self, key, value): - """Stores the object `value` named by `key`. - - Args: - key: Key naming `value` - value: the object to store. + try: + return await FileReader.from_path(path) + except FileNotFoundError as exc: + raise KeyError(key) from exc + except IsADirectoryError as exc: + # Should hopefully only happen if `object_extension` is `""` + raise RuntimeError(f"Key '{key}' names a subtree, not a value") from exc + + + async def get_all(self, key: datastore.Key) -> bytes: + """Returns all the data named by `key` at once or raises `KeyError` + otherwise + + This is an optimization over :meth:`get` for smaller files as it entails + only one context switch to open, read and close the file, rather then + several. + + Arguments + --------- + key + Key naming the data to retrieve + + Raises + ------ + KeyError + The given object was not present in this datastore + RuntimeError + The given ``key`` names a subtree, not a value """ - path = self.object_path(key) - FileSystemDatastore._write_object(path, value) - - def delete(self, key): - """Removes the object named by `key`. - - Args: - key: Key naming the object to remove. + path = trio.Path(self.object_path(key)) + try: + return await path.read_bytes() + except FileNotFoundError as exc: + raise KeyError(key) from exc + except IsADirectoryError as exc: + # Should hopefully only happen if `object_extension` is `""` + raise RuntimeError(f"Key '{key}' names a subtree, not a value") from exc + + + async def _put(self, key: datastore.Key, value: datastore.abc.ReceiveStream) -> None: + """Stores or replaces the data named by `key` with `value` + + Arguments + --------- + key + Key naming the binary data slot to store at + value + Some stream yielding the data to store + + Raises + ------ + RuntimeError + The given ``key`` names a subtree, not a value OR the contains a + value item as part of the key path """ - path = self.object_path(key) - if os.path.exists(path): - os.remove(path) - - # TODO: delete dirs if empty? - - def query(self, query): - """Returns an iterable of objects matching criteria expressed in `query` - FSDatastore.query queries all the `.obj` files within the directory - specified by the query.key. - - Args: - query: Query object describing the objects to return. - - Returns: - Cursor with all objects matching criteria - """ - path = self.path(query.key) - - if os.path.exists(path): - filenames = os.listdir(path) - filenames = list(set(filenames) - set(self.ignore_list)) - filenames = map(lambda f: os.path.join(path, f), filenames) - iterable = FileSystemDatastore._read_object_gen(filenames) - else: - iterable = list() - - return query(iterable) # must apply filters, etc naively. - - def contains(self, key): - """Returns whether the object named by `key` exists. - Optimized to only check whether the file object exists. - - Args: - key: Key naming the object to check. - - Returns: - boolean whether the object exists + path = trio.Path(self.object_path(key)) + + # Ensure containing directory exists + parent = path.parent + try: + await parent.mkdir(parents=True, exist_ok=True) + except FileExistsError as exc: + # Should hopefully only happen if `object_extension` is `""` + raise RuntimeError(f"Key '{key}' requires containing directory " + f"'{parent}' to not be a value") from exc + + try: + async with await trio.open_file(path, "wb") as file: + chunk = await value.receive_some(DEFAULT_BUFFER_SIZE) + while chunk: + await file.write(chunk) + + chunk = await value.receive_some(DEFAULT_BUFFER_SIZE) + except IsADirectoryError as exc: + # Should only happen if `object_extension` is `""` + raise RuntimeError(f"Key '{key}' names a subtree, not a value") from exc + + + async def delete(self, key: datastore.Key): + """Removes the data named by `key` + + Arguments + --------- + key + Key naming the binary data slot to remove + + Raises + ------ + KeyError + The given object was not present in this datastore """ - path = self.object_path(key) - return os.path.exists(path) and os.path.isfile(path) + path = trio.Path(self.object_path(key)) + + try: + await path.unlink() + except FileNotFoundError as exc: + raise KeyError(key) from exc + + # Try to remove parent directories if they are empty + if not self.remove_empty: + return + try: + parent = path.parent + # Attempt to remove all parent directories as long as the + # parent directory is: + # * … a sub-directory of `self.root_path` – checking whether + # the path of that directory starts with `{self.root_path}/`. + # * … not the same directory again – to ensure that pathlib's + # special `Path(".").parent == Path(".")` behaviour doesn't + # bite us. (This check may be unecessary / overly pendantic…) + # The loop is stopped when we either reach the root directory + # or receive an `ENOTEMPTY` error indicating that we tried to + # remove a directory that wasn't actually empty. + while str(parent).startswith(str(self.root_path) + os.path.sep) \ + and parent.parent != parent: + await parent.rmdir() + + parent = parent.parent + except OSError as exc: + if exc.errno == errno.ENOTEMPTY: + return + raise diff --git a/datastore/filesystem/test_filesystem.py b/datastore/filesystem/test_filesystem.py index 003127b..0718913 100644 --- a/datastore/filesystem/test_filesystem.py +++ b/datastore/filesystem/test_filesystem.py @@ -1,31 +1,26 @@ -import os -import shutil -import unittest +import tempfile +import os.path -from datastore.core import serialize -from datastore.core.test.test_basic import TestDatastore +import pytest +import trio.testing +from datastore.core.test.conftest import DatastoreTests from datastore.filesystem import FileSystemDatastore -class TestFileSystemDatastore(TestDatastore): - tmp = os.path.normpath('/tmp/datastore.test.fs') +@pytest.fixture +def temp_path(): + with tempfile.TemporaryDirectory() as temp_path: + yield temp_path - def setUp(self): - if os.path.exists(self.tmp): - shutil.rmtree(self.tmp) - def tearDown(self): - if os.path.exists(self.tmp): - shutil.rmtree(self.tmp) - - def test_datastore(self): - dirs = map(str, range(0, 4)) - dirs = map(lambda d: os.path.join(self.tmp, d), dirs) - fses = map(FileSystemDatastore, dirs) - dses = list(map(serialize.shim, fses)) - self.subtest_simple(dses, numelems=500) - - -if __name__ == '__main__': - unittest.main() +@trio.testing.trio_test +async def test_datastore(temp_path): + dirs = map(str, range(0, 4)) + dirs = map(lambda d: os.path.join(temp_path, d), dirs) + fses = list(map(FileSystemDatastore, dirs)) + await DatastoreTests(fses).subtest_simple() + + # Check that all items were cleaned up + for fs in fses: + assert os.listdir(fs.root_path) == [] diff --git a/datastore/filesystem/util/statx.py b/datastore/filesystem/util/statx.py new file mode 100644 index 0000000..dc99872 --- /dev/null +++ b/datastore/filesystem/util/statx.py @@ -0,0 +1,267 @@ +import collections +import ctypes +import ctypes.util +import errno +import typing +import os + + +class Mask(ctypes.c_uint): + # Basic stats (stuff also part of `os.stat()`) + TYPE = 0x00000001 # Want/got stx_mode & S_IFMT + MODE = 0x00000002 # Want/got stx_mode & ~S_IFMT + NLINK = 0x00000004 # Want/got stx_nlink + UID = 0x00000008 # Want/got stx_uid + GID = 0x00000010 # Want/got stx_gid + ATIME = 0x00000020 # Want/got stx_atime + MTIME = 0x00000040 # Want/got stx_mtime + CTIME = 0x00000080 # Want/got stx_ctime + INO = 0x00000100 # Want/got stx_ino + SIZE = 0x00000200 # Want/got stx_size + BLOCKS = 0x00000400 # Want/got stx_blocks + BASIC_STATS = 0x000007FF # The stuff in the normal stat struct + + # Extensions + BTIME = 0x00000800 # Want/got stx_btime + ALL = 0x00000FFF # All currently supported flags + _RESERVED = 0x80000000 # Reserved for future struct statx expansion + + +# Special FD for value for meaning “no FD” +AT_FDCWD = -100 + +# Path lookup flags applicable for `statx` +AT_SYMLINK_NOFOLLOW = 0x100 # Do not resolve symbolic links +AT_REMOVEDIR = 0x200 # Remove directory instead of unlinking file +AT_NO_AUTOMOUNT = 0x800 # Suppress terminal automount traversal +AT_EMPTY_PATH = 0x1000 # Allow empty relative pathname + +# Accuracy of timestamps required in case of network file systems +AT_STATX_SYNC_TYPE = 0x6000 # Type of synchronisation required from statx(): +AT_STATX_SYNC_AS_STAT = 0x0000 # - Do whatever stat() does +AT_STATX_FORCE_SYNC = 0x2000 # - Force the attributes to be sync'd with the server +AT_STATX_DONT_SYNC = 0x4000 # - Don't sync attributes with the server + + +class struct_statx_timestamp(ctypes.Structure): + _fields_ = [ + # Base file attributes + ("tv_sec", ctypes.c_uint64), + ("tv_nsec", ctypes.c_uint32), + ("__reserved", ctypes.c_uint32), + ] + + +class struct_statx(ctypes.Structure): + _fields_ = [ + # Base file attributes + ("stx_mask", Mask), + ("stx_blksize", ctypes.c_uint32), + ("stx_attributes", ctypes.c_uint64), + ("stx_nlink", ctypes.c_uint32), + ("stx_uid", ctypes.c_uint32), + ("stx_gid", ctypes.c_uint32), + ("stx_mode", ctypes.c_uint16), + ("__spare0", ctypes.c_uint16 * 1), + ("stx_ino", ctypes.c_uint64), + ("stx_size", ctypes.c_uint64), + ("stx_blocks", ctypes.c_uint64), + ("stx_attributes_mask", ctypes.c_uint64), + + # Timestamps + ("stx_atime", struct_statx_timestamp), + ("stx_btime", struct_statx_timestamp), + ("stx_ctime", struct_statx_timestamp), + ("stx_mtime", struct_statx_timestamp), + + # Device ID (if device file) + ("stx_rdev_major", ctypes.c_uint32), + ("stx_rdev_minor", ctypes.c_uint32), + ("stx_dev_major", ctypes.c_uint32), + ("stx_dev_minor", ctypes.c_uint32), + + # Spare space + ("__spare2", ctypes.c_uint64 * 14), + ] + + +assert ctypes.sizeof(struct_statx) == 0x100 + + + +# Only works on Linux with GLibC afaik +_func: typing.Optional[typing.Any] +if os.name == "posix" and os.uname().sysname == "Linux": + try: + _libc = ctypes.CDLL("libc.so.6", use_errno=True) + try: + _error = None + _func = _libc.statx + _func.argtypes = ( + ctypes.c_int, # dirfd + ctypes.c_char_p, # pathname + ctypes.c_int, # flags + ctypes.c_uint, # mask + ctypes.POINTER(struct_statx) + ) + except AttributeError: # Probably not GLibC 2.28+ + _error = NotImplementedError("statx: C library does not expose symbol 'statx'") + _func = None + except OSError: + _error = NotImplementedError("statx: No C library found at name 'libc.so.6'") + _func = None +else: + _error = NotImplementedError("statx: System call is Linux-specific") + _func = None + + + +# We have define our own `stat_result` here as there is no way to add fields +# to `os.stat_result` unless Python thinks they should be there +_stat_result = collections.namedtuple("stat_result", [ + # Standard attributes + "st_mode", + "st_ino", + "st_dev", + "st_nlink", + "st_uid", + "st_gid", + "st_size", + "st_atime", + "st_mtime", + "st_ctime", + + # Platform-dependant attributes + "st_blksize", + "st_blocks", + "st_rdev", + "st_flags", + + # High-precision timestamps + "st_atime_ns", + "st_mtime_ns", + "st_ctime_ns", + + # Birthtime extension (otherwise only available on FreeBSD/macOS) + "st_birthtime", + "st_birthtime_ns" +], defaults=[None, None, None, None, None, None, None, None, None]) + + +class stat_result(_stat_result): + def __repr__(self): + return (f"{self.__module__}.{type(self).__qualname__}(" + f"st_mode={self.st_mode!r}, " + f"st_ino={self.st_ino!r}, " + f"st_dev={self.st_dev!r}, " + f"st_nlink={self.st_nlink!r}, " + f"st_uid={self.st_uid!r}, " + f"st_gid={self.st_gid!r}, " + f"st_size={self.st_size!r}, " + f"st_atime={self.st_atime!r}, " + f"st_mtime={self.st_mtime!r}, " + f"st_ctime={self.st_ctime!r})") + + + +def statx( + dirfd: int = AT_FDCWD, + pathname: bytes = b"", + flags: int = AT_STATX_SYNC_AS_STAT, + mask: int = Mask.BASIC_STATS +) -> struct_statx: + """Low-level wrapper around the ``statx(2)`` Linux system call""" + global _error + if _error: + raise _error + assert _func + + statx_data = struct_statx() + + result = _func(dirfd, pathname, flags, mask, ctypes.byref(statx_data)) + if result < 0: + if ctypes.get_errno() == errno.ENOSYS: # Kernel does not support syscall + _error = NotImplementedError("statx: System call not supported by this version of Linux") + raise _error + raise OSError(ctypes.get_errno(), os.strerror(ctypes.get_errno())) + + return statx_data + + +def stat(path, *, dir_fd: int = None, follow_symlinks: bool = True) \ + -> typing.Union[os.stat_result, stat_result]: + """High-level wrapper around the ``statx(2)`` system call, that delegates + to :func:`os.stat` on other platforms, but provides `st_birthtime` on Linux.""" + def ts_to_nstime(ts): + return ts.tv_sec * 1000_000_000 + ts.tv_nsec + + if not _error: + try: + stx_flags = AT_STATX_SYNC_AS_STAT + + if isinstance(path, int): + stx_dirfd = path + stx_path = b"" + stx_flags |= AT_EMPTY_PATH + else: + stx_dirfd = dir_fd if dir_fd is not None else AT_FDCWD + stx_path = os.fsencode(os.fspath(path)) + + if not follow_symlinks: + stx_flags |= AT_SYMLINK_NOFOLLOW + + stx_result = statx(stx_dirfd, stx_path, stx_flags, Mask.BASIC_STATS | Mask.BTIME) + assert (~stx_result.stx_mask.value & (Mask.BASIC_STATS & ~Mask.BLOCKS)) == 0 + + st_blocks = None + st_birthtime = None + st_birthtime_ns = None + if stx_result.stx_mask.value & Mask.BLOCKS: + st_blocks = stx_result.stx_blocks + if stx_result.stx_mask.value & Mask.BTIME: + st_birthtime = stx_result.stx_btime.tv_sec + st_birthtime_ns = ts_to_nstime(stx_result.stx_btime) + + + return stat_result( + # Standard struct data + stx_result.stx_mode, + stx_result.stx_ino, + os.makedev(stx_result.stx_dev_major, stx_result.stx_dev_minor), + stx_result.stx_nlink, + stx_result.stx_uid, + stx_result.stx_gid, + stx_result.stx_size, + stx_result.stx_atime.tv_sec, + stx_result.stx_ctime.tv_sec, + stx_result.stx_mtime.tv_sec, + + # Extended (platform-dependant) attributes + stx_result.stx_blksize, + os.makedev(stx_result.stx_rdev_major, stx_result.stx_rdev_minor), + stx_result.stx_attributes, + st_blocks, + + # High-precision timestamps + ts_to_nstime(stx_result.stx_atime), + ts_to_nstime(stx_result.stx_ctime), + ts_to_nstime(stx_result.stx_mtime), + + # Non-standard birth time value + st_birthtime, + st_birthtime_ns + ) + except NotImplementedError: + pass + + return os.stat(path, dir_fd=dir_fd, follow_symlinks=follow_symlinks) + + +def lstat(path, *, dir_fd=None): + """Alias for ``stat(…, follow_symlinks=False)`.""" + return stat(path, dir_fd=dir_fd, follow_symlinks=False) + + +def fstat(fd): + """Alias for ``stat(fd)`.""" + return stat(fd) diff --git a/datastore/typing.py b/datastore/typing.py new file mode 100644 index 0000000..c2b1581 --- /dev/null +++ b/datastore/typing.py @@ -0,0 +1,2 @@ +from .core.util.stream import ArbitraryReceiveChannel +from .core.util.stream import ArbitraryReceiveStream diff --git a/datastore/util.py b/datastore/util.py new file mode 100644 index 0000000..7f10941 --- /dev/null +++ b/datastore/util.py @@ -0,0 +1,2 @@ +from .core.util.stream import receive_channel_from +from .core.util.stream import receive_stream_from diff --git a/tox.ini b/tox.ini index 037fab3..5fd9bd4 100644 --- a/tox.ini +++ b/tox.ini @@ -36,7 +36,7 @@ passenv = TERM #HACK: We cannot use mypy's optimized (mypyc) binary distribution as that breaks # the monkey patching in the `./mypy_run.py` script… -install_command=python -m pip install --no-binary {opts} {packages} +install_command=python -m pip install --no-binary mypy {opts} {packages} [flake8] exclude = .git,.tox,+junk,coverage,dist,doc,*egg,build,tools,test/unit,docs,*__init__.py @@ -60,4 +60,4 @@ python_files = addopts = ; --doctest-modules datastore/core/test/ -; datastore/filesystem/ + datastore/filesystem/