diff --git a/imas/ids_factory.py b/imas/ids_factory.py index cd88952..02f4dd4 100644 --- a/imas/ids_factory.py +++ b/imas/ids_factory.py @@ -1,7 +1,6 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -"""Tools for generating IDSs from a Data Dictionary version. -""" +"""Tools for generating IDSs from a Data Dictionary version.""" import logging from functools import partial @@ -9,7 +8,7 @@ from imas import dd_zip from imas.exception import IDSNameError -from imas.ids_toplevel import IDSToplevel +from imas.ids_toplevel import IDSToplevel, LazyIDSToplevel logger = logging.getLogger(__name__) @@ -93,7 +92,9 @@ def new(self, ids_name: str, *, _lazy: bool = False) -> IDSToplevel: """ if ids_name not in self._ids_elements: raise IDSNameError(ids_name, self) - return IDSToplevel(self, self._ids_elements[ids_name], _lazy) + if _lazy: + return LazyIDSToplevel(self, self._ids_elements[ids_name]) + return IDSToplevel(self, self._ids_elements[ids_name]) def exists(self, ids_name: str) -> bool: """Check if an IDS type with the given name exists.""" diff --git a/imas/ids_metadata.py b/imas/ids_metadata.py index 4d2d5db..f009713 100644 --- a/imas/ids_metadata.py +++ b/imas/ids_metadata.py @@ -1,7 +1,7 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -"""Core of the IMAS-Python interpreted IDS metadata -""" +"""Core of the IMAS-Python interpreted IDS metadata""" + import re import types from enum import Enum @@ -79,6 +79,9 @@ def get_toplevel_metadata(structure_xml: Element) -> "IDSMetadata": _type_map: Dict[Tuple[IDSDataType, int], Type] = {} """Map of IDSDataType and ndim to IDSBase implementation class.""" +_lazy_types: Dict[Type, Type] = {} +"""Maps IDSStructure, IDSStructArray and IDSToplevel to their lazy-loaded counterparts. +""" def _build_type_map(): @@ -94,9 +97,9 @@ def _build_type_map(): IDSString0D, IDSString1D, ) - from imas.ids_struct_array import IDSStructArray - from imas.ids_structure import IDSStructure - from imas.ids_toplevel import IDSToplevel + from imas.ids_struct_array import IDSStructArray, LazyIDSStructArray + from imas.ids_structure import IDSStructure, LazyIDSStructure + from imas.ids_toplevel import IDSToplevel, LazyIDSToplevel _type_map[(None, 0)] = IDSToplevel _type_map[(IDSDataType.STRUCTURE, 0)] = IDSStructure @@ -111,6 +114,10 @@ def _build_type_map(): _type_map[(IDSDataType.FLT, dim)] = IDSNumericArray _type_map[(IDSDataType.CPX, dim)] = IDSNumericArray + _lazy_types[IDSStructArray] = LazyIDSStructArray + _lazy_types[IDSStructure] = LazyIDSStructure + _lazy_types[IDSToplevel] = LazyIDSToplevel + class IDSMetadata: """Container for IDS Metadata stored in the Data Dictionary. @@ -254,6 +261,7 @@ def __init__( # Cache node type self._node_type: Type = _type_map[self.data_type, self.ndim] + self._lazy_node_type: Type = _lazy_types.get(self._node_type, self._node_type) # AL expects ndim of STR types to be one more (STR_0D is 1D array of chars) self._al_ndim = self.ndim + (self.data_type is IDSDataType.STR) diff --git a/imas/ids_struct_array.py b/imas/ids_struct_array.py index b176864..f36720d 100644 --- a/imas/ids_struct_array.py +++ b/imas/ids_struct_array.py @@ -1,7 +1,6 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -"""IDS StructArray represents an Array of Structures in the IDS tree. -""" +"""IDS StructArray represents an Array of Structures in the IDS tree.""" import logging from copy import deepcopy @@ -26,7 +25,7 @@ class IDSStructArray(IDSBase): """ __doc__ = IDSDoc(__doc__) - __slots__ = ["_parent", "_lazy", "metadata", "value", "_lazy_ctx"] + __slots__ = ["_parent", "metadata", "value"] def __init__(self, parent: IDSBase, metadata: IDSMetadata): """Initialize IDSStructArray from XML specification @@ -37,17 +36,10 @@ def __init__(self, parent: IDSBase, metadata: IDSMetadata): metadata: IDSMetadata describing the structure of the IDS """ self._parent = parent - self._lazy = parent._lazy self.metadata = metadata - - # Initialize with an 0-length list or None when lazy loading - self.value = None if self._lazy else [] + self.value = [] """""" - # Lazy loading context, only applicable when self._lazy is True - # When lazy loading, all items in self.value are None until they are requested - self._lazy_ctx: Optional[LazyALArrayStructContext] = None - @property def coordinates(self): """Coordinates of this array of structures.""" @@ -69,49 +61,6 @@ def __eq__(self, other) -> bool: # Equal if same size and all contained structures are the same return len(self) == len(other) and all(a == b for a, b in zip(self, other)) - def _set_lazy_context(self, ctx: LazyALArrayStructContext) -> None: - """Called by DBEntry during a lazy get/get_slice. - - Set the context that we can use for retrieving our size and children. - """ - self._lazy_ctx = ctx - - def _load(self, item: Optional[int]) -> None: - """When lazy loading, ensure that the requested item is loaded. - - Args: - item: index of the item to load. When None, just ensure that our size is - loaded from the lowlevel. - """ - assert self._lazy - if self.value is not None: # We already loaded our size - if item is None: - return - if self.value[item] is not None: - return # item is already loaded - # Load requested data from the backend - if self.value is None: - if self._lazy_ctx is None: - # Lazy context can be None when: - # 1. The element does not exist in the on-disk DD version - # 2. The element exists, but changed type compared to the on-disk DD - # In both cases we just report that we're empty - self.value = [] - else: - ctx = self._lazy_ctx.get_context() - self.value = [None] * ctx.size - - if item is not None: - if item < 0: - item += len(self) - if item < 0 or item >= len(self): - raise IndexError("list index out of range") - # Create the requested item - from imas.ids_structure import IDSStructure - - element = self.value[item] = IDSStructure(self, self.metadata) - element._set_lazy_context(self._lazy_ctx.iterate_to_index(item)) - @property def _element_structure(self): """Prepare an element structure JIT""" @@ -124,15 +73,11 @@ def __getitem__(self, item): # value is a list, so the given item should be convertable to integer # TODO: perhaps we should allow slices as well? list_idx = int(item) - if self._lazy: - self._load(item) return self.value[list_idx] def __setitem__(self, item, value): # value is a list, so the given item should be convertable to integer # TODO: perhaps we should allow slices as well? - if self._lazy: - raise ValueError("Lazy-loaded IDSs are read-only.") list_idx = int(item) if isinstance(value, (IDSIdentifier, str, int)): self.value[list_idx]._assign_identifier(value) @@ -140,8 +85,6 @@ def __setitem__(self, item, value): self.value[list_idx] = value def __len__(self) -> int: - if self._lazy: - self._load(None) return len(self.value) @property @@ -150,8 +93,6 @@ def shape(self) -> Tuple[int]: This will always return a tuple: ``(len(self), )``. """ - if self._lazy: - self._load(None) return (len(self.value),) def append(self, elt): @@ -160,8 +101,6 @@ def append(self, elt): Args: elt: IDS structure, or list of IDS structures, to append to this array """ - if self._lazy: - raise ValueError("Lazy-loaded IDSs are read-only.") if not isinstance(elt, list): elements = [elt] else: @@ -185,8 +124,6 @@ def resize(self, nbelt: int, keep: bool = False): keep: Specifies if the targeted array of structure should keep existing data in remaining elements after resizing it. """ - if self._lazy: - raise ValueError("Lazy-loaded IDSs are read-only.") if nbelt < 0: raise ValueError(f"Invalid size {nbelt}: size may not be negative") if not keep: @@ -231,3 +168,83 @@ def _xxhash(self) -> bytes: for s in self: hsh.update(s._xxhash()) return hsh.digest() + + +class LazyIDSStructArray(IDSStructArray): + __slots__ = ["_lazy_context"] + _lazy = True + + def __init__(self, parent, metadata): + super().__init__(parent, metadata) + # self.value set to None to indicate that we don't know our size yet. It will + # be set to a list when needed (with values equal to None when not yet loaded): + self.value = None + self._lazy_context: Optional[LazyALArrayStructContext] = None + + def __deepcopy__(self, memo): + raise NotImplementedError("deepcopy is not implemented for lazy-loaded IDSs.") + + def _set_lazy_context(self, ctx: LazyALArrayStructContext) -> None: + """Called by DBEntry during a lazy get/get_slice. + + Set the context that we can use for retrieving our size and children. + """ + self._lazy_context = ctx + + def _load(self, item: Optional[int]) -> None: + """Ensure that the requested item is loaded. + + Args: + item: index of the item to load. When None, just ensure that our size is + loaded from the lowlevel. + """ + if self.value is not None: # We already loaded our size + if item is None or self.value[item] is not None: + return + # Load requested data from the backend + if self.value is None: + if self._lazy_context is None: + # Lazy context can be None when: + # 1. The element does not exist in the on-disk DD version + # 2. The element exists, but changed type compared to the on-disk DD + # In both cases we just report that we're empty + self.value = [] + else: + ctx = self._lazy_context.get_context() + self.value = [None] * ctx.size + + if item is not None: + if item < 0: + item += len(self) + if item < 0 or item >= len(self): + raise IndexError("list index out of range") + # Create the requested item + from imas.ids_structure import LazyIDSStructure + + element = self.value[item] = LazyIDSStructure(self, self.metadata) + element._set_lazy_context(self._lazy_context.iterate_to_index(item)) + + def __getitem__(self, item): + # value is a list, so the given item should be convertable to integer + # TODO: perhaps we should allow slices as well? + list_idx = int(item) + self._load(item) + return self.value[list_idx] + + def __setitem__(self, item, value): + raise ValueError("Lazy-loaded IDSs are read-only.") + + def __len__(self) -> int: + self._load(None) + return len(self.value) + + @property + def shape(self) -> Tuple[int]: + self._load(None) + return (len(self.value),) + + def append(self, elt): + raise ValueError("Lazy-loaded IDSs are read-only.") + + def resize(self, nbelt: int, keep: bool = False): + raise ValueError("Lazy-loaded IDSs are read-only.") diff --git a/imas/ids_structure.py b/imas/ids_structure.py index 2727003..c8e297c 100644 --- a/imas/ids_structure.py +++ b/imas/ids_structure.py @@ -1,7 +1,6 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -"""A structure in an IDS -""" +"""A structure in an IDS""" import logging from copy import deepcopy @@ -34,38 +33,29 @@ class IDSStructure(IDSBase): __doc__ = IDSDoc(__doc__) _children: "MappingProxyType[str, IDSMetadata]" - _lazy_context: Optional["LazyALContext"] + _lazy = False def __init__(self, parent: IDSBase, metadata: IDSMetadata): """Initialize IDSStructure from metadata specification Args: - parent: Parent structure. Can be anything, but at database write - time should be something with a path attribute + parent: Parent structure or structarray metadata: IDSMetadata describing the structure of the IDS """ # Performance hack: bypass our __setattr__ implementation during __init__: dct = self.__dict__ dct["_parent"] = parent - # parent._lazy is undefined for IDSToplevel, but then _lazy is already set - if "_lazy" not in dct: - dct["_lazy"] = parent._lazy dct["metadata"] = metadata dct["_children"] = metadata._children - dct["_lazy_context"] = None def __getattr__(self, name): if name not in self._children: - raise AttributeError( - f"IDS structure '{self._path}' has no attribute '{name}'" - ) + raise AttributeError(f"'{self!r}' has no attribute '{name}'") # Create child node child_meta = self._children[name] child = child_meta._node_type(self, child_meta) self.__dict__[name] = child # bypass setattr logic below: avoid recursion - if self._lazy and self._lazy_context is not None: # lazy load the child - self._lazy_context.get_child(child) return child def _assign_identifier(self, value: Union[IDSIdentifier, str, int]) -> None: @@ -143,10 +133,6 @@ def __setattr__(self, key, value): attr.value = value def __deepcopy__(self, memo): - if self._lazy: - raise NotImplementedError( - "deepcopy is not implemented for lazy-loaded IDSs." - ) copy = self.__class__(self._parent, self.metadata) for child in self._children: if child in self.__dict__: @@ -168,13 +154,6 @@ def __eq__(self, other) -> bool: return False # Not equal if there is any difference return True # Equal when there are no differences - def _set_lazy_context(self, ctx: "LazyALContext") -> None: - """Called by DBEntry during a lazy get/get_slice. - - Set the context that we can use for retrieving our children. - """ - self._lazy_context = ctx - @property def _dd_parent(self) -> IDSBase: if self.metadata.data_type is IDSDataType.STRUCT_ARRAY: @@ -184,10 +163,6 @@ def _dd_parent(self) -> IDSBase: @property def has_value(self) -> bool: """True if any of the children has a non-default value""" - if self._lazy: - raise NotImplementedError( - "`has_value` is not implemented for lazy-loaded structures." - ) for _ in self.iter_nonempty_(): return True return False @@ -247,22 +222,10 @@ def iter_nonempty_(self, *, accept_lazy=False) -> Generator[IDSBase, None, None] lazy-loaded IDS. Non-empty nodes that have not been loaded from the backend are not iterated over. See detailed explanation above. """ - if self._lazy and not accept_lazy: - raise RuntimeError( - "Iterating over non-empty nodes of a lazy loaded IDS will skip nodes " - "that are not loaded. Set accept_lazy=True to continue. " - "See the documentation for more information: " - "https://imas-python.readthedocs.io/en/latest" - "/generated/imas.ids_structure." - "IDSStructure.html#imas.ids_structure.IDSStructure.iter_nonempty_" - ) for child in self._children: - if child in self.__dict__: - child_node = getattr(self, child) - if ( # IDSStructure.has_value is not implemented when lazy-loaded: - self._lazy and isinstance(child_node, IDSStructure) - ) or child_node.has_value: - yield child_node + child_node = self.__dict__.get(child, None) + if child_node is not None and child_node.has_value: + yield child_node def __iter__(self): """Iterate over this structure's children""" @@ -329,3 +292,58 @@ def _xxhash(self) -> bytes: hsh.update(child._xxhash()) return hsh.digest() + + +class LazyIDSStructure(IDSStructure): + _lazy_context: Optional["LazyALContext"] + _lazy = True + + def __init__(self, parent, metadata): + super().__init__(parent, metadata) + # Bypass setattr logic: + self.__dict__["_lazy_context"] = None + + def __getattr__(self, name): + if name not in self._children: + raise AttributeError(f"'{self!r}' has no attribute '{name}'") + # Create child node + child_meta = self._children[name] + child = child_meta._lazy_node_type(self, child_meta) + if self._lazy_context is not None: # lazy load the child + self._lazy_context.get_child(child) + self.__dict__[name] = child # bypass setattr logic + return child + + def __deepcopy__(self, memo): + raise NotImplementedError("deepcopy is not implemented for lazy-loaded IDSs.") + + def _set_lazy_context(self, ctx: "LazyALContext") -> None: + """Called by DBEntry during a lazy get/get_slice. + + Set the context that we can use for retrieving our children. + """ + self._lazy_context = ctx + + @property + def has_value(self) -> bool: + """True if any of the children has a non-default value""" + raise NotImplementedError( + "`has_value` is not implemented for lazy-loaded structures." + ) + + def iter_nonempty_(self, *, accept_lazy=False): + if not accept_lazy: + raise RuntimeError( + "Iterating over non-empty nodes of a lazy loaded IDS will skip nodes " + "that are not loaded. Set accept_lazy=True to continue. " + "See the documentation for more information: " + "https://imas-python.readthedocs.io/en/latest" + "/generated/imas.ids_structure." + "IDSStructure.html#imas.ids_structure.IDSStructure.iter_nonempty_" + ) + for child in self._children: + child_node = self.__dict__.get(child, None) + if child_node is not None: + # LazyIDSStructure.has_value is not implemented: + if isinstance(child_node, LazyIDSStructure) or child_node.has_value: + yield child_node diff --git a/imas/ids_toplevel.py b/imas/ids_toplevel.py index 15ae097..d50ffd4 100644 --- a/imas/ids_toplevel.py +++ b/imas/ids_toplevel.py @@ -1,7 +1,6 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -"""Represents a Top-level IDS (like ``core_profiles``, ``equilibrium``, etc) -""" +"""Represents a Top-level IDS (like ``core_profiles``, ``equilibrium``, etc)""" import logging import os @@ -27,7 +26,7 @@ needs_imas, ) from imas.ids_metadata import IDSMetadata, IDSType, get_toplevel_metadata -from imas.ids_structure import IDSStructure +from imas.ids_structure import IDSStructure, LazyIDSStructure if TYPE_CHECKING: from imas.db_entry import DBEntry @@ -71,16 +70,15 @@ class IDSToplevel(IDSStructure): __doc__ = IDSDoc(__doc__) _path = "" # Path to ourselves without the IDS name and slashes + _lazy = False - def __init__(self, parent: "IDSFactory", structure_xml, lazy=False): - """Save backend_version and backend_xml and build translation layer. + def __init__(self, parent: "IDSFactory", structure_xml): + """Initialize toplevel IDS. Args: parent: Parent of ``self``. structure_xml: XML structure that defines this IDS toplevel. - lazy: Whether this toplevel is used for a lazy-loaded get() or get_slice() """ - self._lazy = lazy # structure_xml might be an IDSMetadata already when initializing from __copy__ # or __deepcopy__ if isinstance(structure_xml, IDSMetadata): @@ -89,11 +87,6 @@ def __init__(self, parent: "IDSFactory", structure_xml, lazy=False): metadata = get_toplevel_metadata(structure_xml) super().__init__(parent, metadata) - def __deepcopy__(self, memo): - copy = super().__deepcopy__(memo) - copy._lazy = self._lazy - return copy - @property def _dd_version(self) -> str: return self._version @@ -379,3 +372,28 @@ def _toplevel(self) -> "IDSToplevel": """Return ourselves""" # Used to cut off recursive call return self + + +class LazyIDSToplevel(LazyIDSStructure, IDSToplevel): + """Represents a lazy loaded IDS.""" + + # A note on multi-inheritance an MRO (method resolution order) and why we do not + # need to implement anything in this class: + # + # The method resolution order for this class (accessible with __mro__) is: + # - LazyIDSToplevel + # - LazyIDSStructure + # - IDSToplevel + # - IDSStructure + # - IDSBase + # - object + # + # Any class method or attribute is looked up according to the MRO. Some examples: + # - _lazy is defined on both LazyIDSStructure (True) and IDSToplevel (False). Since + # LazyIDSStructure is higher in the MRO, LazyIDSToplevel._lazy is True. + # - _validate is defined on both IDSToplevel and IDSStructure. Since IDSToplevel is + # higher in the MRO, LazyIDSToplevel._validate refers to the method from + # IDSToplevel. + # + # More details on MRO can be found in the Python docs: + # https://docs.python.org/3/howto/mro.html