iterorganization · maarten-ic · Feb 10, 2025 · Feb 4, 2025 · Feb 4, 2025 · Feb 4, 2025
@@ -11,7 +11,7 @@ IMAS netCDF files
 
 IMAS-Python supports reading IDSs from and writing IDSs to IMAS netCDF files. This
 feature is currently in alpha status, and its functionality may change in
-upcoming minor releases of IMAS-Python.
+upcoming (minor) releases of IMAS-Python.
 
 A detailed description of the IMAS netCDF format and conventions can be found on
 the :ref:`IMAS conventions for the netCDF data format` page.
@@ -42,6 +42,34 @@ will be used for :py:meth:`~imas.db_entry.DBEntry.get` and
     imas.util.print_tree(cp2)
 
 
+Implemented features of a netCDF ``DBEntry``
+--------------------------------------------
+
+A netCDF ``DBEntry`` doesn't implement all features that are supported by
+``imas_core``. The following table provides an overview of the implemented
+features that are supported by DBEntries using ``imas_core`` respectively
+``netCDF``:
+
+.. list-table::
+    :header-rows: 1
+
+    * - Feature
+      - ``imas_core``
+      - ``netCDF``
+    * - :ref:`Lazy loading`
+      - Yes
+      - Yes
+    * - :ref:`Automatic conversion between DD versions <Conversion of IDSs between DD versions>`
+      - When reading and writing
+      - When reading
+    * - ``get_slice`` / ``put_slice``
+      - Yes
+      - Not implemented
+    * - ``get_sample``
+      - Yes (requires ``imas_core >= 5.4.0``)
+      - Not implemented
+
+
 Using IMAS netCDF files with 3rd-party tools
 --------------------------------------------
 

@@ -10,6 +10,7 @@
 
 import numpy
 
+import imas
 from imas.backends.imas_core.imas_interface import ll_interface
 from imas.exception import LowlevelError
 from imas.ids_defs import (
@@ -280,6 +281,9 @@ def __init__(
         self.context = None
         """Potential weak reference to opened context."""
 
+    def get_child(self, child):
+        imas.backends.imas_core.db_entry_helpers._get_child(child, self)
+
     def get_context(self) -> ALContext:
         """Create and yield the actual ALContext."""
         if self.dbentry._db_ctx is not self.dbentry_ctx:

@@ -22,7 +22,7 @@ def get_children(
     structure: IDSStructure,
     ctx: ALContext,
     time_mode: int,
-    nbc_map: Optional[NBCPathMap],
+    nbc_map: Optional["NBCPathMap"],
 ) -> None:
     """Recursively get all children of an IDSStructure."""
     # NOTE: changes in this method must be propagated to _get_child and vice versa
@@ -77,15 +77,11 @@ def get_children(
                 getattr(structure, name)._IDSPrimitive__value = data
 
 
-def _get_child(child: IDSBase, ctx: Optional[LazyALContext]):
+def _get_child(child: IDSBase, ctx: LazyALContext):
     """Get a single child when required (lazy loading)."""
     # NOTE: changes in this method must be propagated to _get_children and vice versa
     #   Performance: this method is specialized for the lazy get
 
-    # ctx can be None when the parent structure does not exist in the on-disk DD version
-    if ctx is None:
-        return  # There is no data to be loaded
-
     time_mode = ctx.time_mode
     if time_mode == IDS_TIME_MODE_INDEPENDENT and child.metadata.type.is_dynamic:
         return  # skip dynamic (time-dependent) nodes
@@ -148,7 +144,7 @@ def put_children(
     ctx: ALContext,
     time_mode: int,
     is_slice: bool,
-    nbc_map: Optional[NBCPathMap],
+    nbc_map: Optional["NBCPathMap"],
     verify_maxoccur: bool,
 ) -> None:
     """Recursively put all children of an IDSStructure"""

@@ -32,7 +32,7 @@
     imasdef = None
     lowlevel = None
     logger.critical(
-        "Could not import 'al_core': %s. Some functionality is not available.",
+        "Could not import 'imas_core': %s. Some functionality is not available.",
         exc,
     )
 

@@ -11,7 +11,7 @@
 from imas.backends.netcdf.ids2nc import IDS2NC
 from imas.backends.netcdf.nc2ids import NC2IDS
 from imas.exception import DataEntryException, InvalidNetCDFEntry
-from imas.ids_convert import NBCPathMap, convert_ids
+from imas.ids_convert import NBCPathMap, dd_version_map_from_factories
 from imas.ids_factory import IDSFactory
 from imas.ids_toplevel import IDSToplevel
 
@@ -108,10 +108,6 @@ def get(
             else:
                 func = "get_sample"
             raise NotImplementedError(f"`{func}` is not available for netCDF files.")
-        if lazy:
-            raise NotImplementedError(
-                "Lazy loading is not implemented for netCDF files."
-            )
 
         # Check if the IDS/occurrence exists, and obtain the group it is stored in
         try:
@@ -123,14 +119,19 @@ def get(
 
         # Load data into the destination IDS
         if self._ds_factory.dd_version == destination._dd_version:
-            NC2IDS(group, destination).run()
+            NC2IDS(group, destination, destination.metadata, None).run(lazy)
         else:
-            # FIXME: implement automatic conversion using nbc_map
-            #   As a work-around: do an explicit conversion, but automatic conversion
-            #   will also be needed to implement lazy loading.
-            ids = self._ds_factory.new(ids_name)
-            NC2IDS(group, ids).run()
-            convert_ids(ids, None, target=destination)
+            # Construct relevant NBCPathMap, the one we get from DBEntry has the reverse
+            # mapping from what we need. The imas_core logic does the mapping from
+            # in-memory to on-disk, while we take what is on-disk and map it to
+            # in-memory.
+            ddmap, source_is_older = dd_version_map_from_factories(
+                ids_name, self._ds_factory, self._factory
+            )
+            nbc_map = ddmap.old_to_new if source_is_older else ddmap.new_to_old
+            NC2IDS(
+                group, destination, self._ds_factory.new(ids_name).metadata, nbc_map
+            ).run(lazy)
 
         return destination
 

@@ -3,11 +3,13 @@
 from typing import Iterator, List, Optional, Tuple
 
 import netCDF4
+import numpy as np
 
 from imas.backends.netcdf import ids2nc
 from imas.backends.netcdf.nc_metadata import NCMetadata
 from imas.exception import InvalidNetCDFEntry
 from imas.ids_base import IDSBase
+from imas.ids_convert import NBCPathMap
 from imas.ids_data_type import IDSDataType
 from imas.ids_defs import IDS_TIME_MODE_HOMOGENEOUS
 from imas.ids_metadata import IDSMetadata
@@ -70,22 +72,37 @@ def _tree_iter(
 class NC2IDS:
     """Class responsible for reading an IDS from a NetCDF group."""
 
-    def __init__(self, group: netCDF4.Group, ids: IDSToplevel) -> None:
+    def __init__(
+        self,
+        group: netCDF4.Group,
+        ids: IDSToplevel,
+        ids_metadata: IDSMetadata,
+        nbc_map: Optional[NBCPathMap],
+    ) -> None:
         """Initialize NC2IDS converter.
 
         Args:
             group: NetCDF group that stores the IDS data.
             ids: Corresponding IDS toplevel to store the data in.
+            ids_metadata: Metadata corresponding to the DD version that the data is
+                stored in.
+            nbc_map: Path map for implicit DD conversions.
         """
         self.group = group
         """NetCDF Group that the IDS is stored in."""
         self.ids = ids
         """IDS to store the data in."""
+        self.ids_metadata = ids_metadata
+        """Metadata of the IDS in the DD version that the data is stored in"""
+        self.nbc_map = nbc_map
+        """Path map for implicit DD conversions."""
 
-        self.ncmeta = NCMetadata(ids.metadata)
+        self.ncmeta = NCMetadata(ids_metadata)
         """NetCDF related metadata."""
         self.variables = list(group.variables)
         """List of variable names stored in the netCDF group."""
+
+        self._lazy_map = {}
         # Don't use masked arrays: they're slow and we'll handle most of the unset
         # values through the `:shape` arrays
         self.group.set_auto_mask(False)
@@ -99,31 +116,60 @@ def __init__(self, group: netCDF4.Group, ids: IDSToplevel) -> None:
                 "Mandatory variable `ids_properties.homogeneous_time` does not exist."
             )
         var = group["ids_properties.homogeneous_time"]
-        self._validate_variable(var, ids.ids_properties.homogeneous_time.metadata)
+        self._validate_variable(var, ids.metadata["ids_properties/homogeneous_time"])
         if var[()] not in [0, 1, 2]:
             raise InvalidNetCDFEntry(
                 f"Invalid value for ids_properties.homogeneous_time: {var[()]}. "
                 "Was expecting: 0, 1 or 2."
             )
         self.homogeneous_time = var[()] == IDS_TIME_MODE_HOMOGENEOUS
 
-    def run(self) -> None:
+    def run(self, lazy: bool) -> None:
         """Load the data from the netCDF group into the IDS."""
         self.variables.sort()
         self.validate_variables()
+        if lazy:
+            self.ids._set_lazy_context(LazyContext(self))
         for var_name in self.variables:
             if var_name.endswith(":shape"):
                 continue
-            metadata = self.ids.metadata[var_name]
+            metadata = self.ids_metadata[var_name]
 
             if metadata.data_type is IDSDataType.STRUCTURE:
                 continue  # This only contains DD metadata we already know
 
+            # Handle implicit DD version conversion
+            if self.nbc_map is None:
+                target_metadata = metadata  # no conversion
+            elif metadata.path_string in self.nbc_map:
+                new_path = self.nbc_map.path[metadata.path_string]
+                if new_path is None:
+                    logging.info(
+                        "Not loading data for %s: no equivalent data structure exists "
+                        "in the target Data Dictionary version.",
+                        metadata.path_string,
+                    )
+                    continue
+                target_metadata = self.ids.metadata[new_path]
+            elif metadata.path_string in self.nbc_map.type_change:
+                logging.info(
+                    "Not loading data for %s: cannot hanlde type changes when "
+                    "implicitly converting data to the target Data Dictionary version.",
+                    metadata.path_string,
+                )
+                continue
+            else:
+                target_metadata = metadata  # no conversion required
+
             var = self.group[var_name]
+            if lazy:
+                self._lazy_map[target_metadata.path_string] = var
+                continue
+
             if metadata.data_type is IDSDataType.STRUCT_ARRAY:
                 if "sparse" in var.ncattrs():
                     shapes = self.group[var_name + ":shape"][()]
-                    for index, node in tree_iter(self.ids, metadata):
+                    for index, node in tree_iter(self.ids, target_metadata):
                         node.resize(shapes[index][0])
 
                 else:
@@ -132,7 +178,7 @@ def run(self) -> None:
                         metadata.path_string, self.homogeneous_time
                     )[-1]
                     size = self.group.dimensions[dim].size
-                    for _, node in tree_iter(self.ids, metadata):
+                    for _, node in tree_iter(self.ids, target_metadata):
                         node.resize(size)
 
                 continue
@@ -144,23 +190,30 @@ def run(self) -> None:
             if "sparse" in var.ncattrs():
                 if metadata.ndim:
                     shapes = self.group[var_name + ":shape"][()]
-                    for index, node in tree_iter(self.ids, metadata):
+                    for index, node in tree_iter(self.ids, target_metadata):
                         shape = shapes[index]
                         if shape.all():
-                            node.value = data[index + tuple(map(slice, shapes[index]))]
+                            # NOTE: bypassing IDSPrimitive.value.setter logic
+                            node._IDSPrimitive__value = data[
+                                index + tuple(map(slice, shape))
+                            ]
                 else:
-                    for index, node in tree_iter(self.ids, metadata):
+                    for index, node in tree_iter(self.ids, target_metadata):
                         value = data[index]
                         if value != getattr(var, "_FillValue", None):
-                            node.value = data[index]
+                            # NOTE: bypassing IDSPrimitive.value.setter logic
+                            node._IDSPrimitive__value = value
 
             elif metadata.path_string not in self.ncmeta.aos:
                 # Shortcut for assigning untensorized data
-                self.ids[metadata.path] = data
+                # Note: var[()] can return 0D numpy arrays. Instead of handling this
+                # here, we'll let IDSPrimitive.value.setter take care of it:
+                self.ids[target_metadata.path].value = data
 
             else:
-                for index, node in tree_iter(self.ids, metadata):
-                    node.value = data[index]
+                for index, node in tree_iter(self.ids, target_metadata):
+                    # NOTE: bypassing IDSPrimitive.value.setter logic
+                    node._IDSPrimitive__value = data[index]
 
     def validate_variables(self) -> None:
         """Validate that all variables in the netCDF Group exist and match the DD."""
@@ -194,7 +247,7 @@ def validate_variables(self) -> None:
             # Check that the DD defines this variable, and validate its metadata
             var = self.group[var_name]
             try:
-                metadata = self.ids.metadata[var_name]
+                metadata = self.ids_metadata[var_name]
             except KeyError:
                 raise InvalidNetCDFEntry(
                     f"Invalid variable {var_name}: no such variable exists in the "
@@ -300,3 +353,69 @@ def _validate_sparsity(
             raise variable_error(
                 shape_var, "dtype", shape_var.dtype, "any integer type"
             )
+
+
+class LazyContext:
+    def __init__(self, nc2ids, index=()):
+        self.nc2ids = nc2ids
+        self.index = index
+
+    def get_child(self, child):
+        metadata = child.metadata
+        path = metadata.path_string
+        data_type = metadata.data_type
+        nc2ids = self.nc2ids
+        var = nc2ids._lazy_map.get(path)
+
+        if data_type is IDSDataType.STRUCT_ARRAY:
+            # Determine size of the aos
+            if var is None:
+                size = 0
+            elif "sparse" in var.ncattrs():
+                size = nc2ids.group[var.name + ":shape"][self.index][0]
+            else:
+                # FIXME: extract dimension name from nc file?
+                dim = nc2ids.ncmeta.get_dimensions(
+                    metadata.path_string, nc2ids.homogeneous_time
+                )[-1]
+                size = nc2ids.group.dimensions[dim].size
+
+            child._set_lazy_context(LazyArrayStructContext(nc2ids, self.index, size))
+
+        elif data_type is IDSDataType.STRUCTURE:
+            child._set_lazy_context(self)
+
+        elif var is not None:  # Data elements
+            value = None
+            if "sparse" in var.ncattrs():
+                if metadata.ndim:
+                    shape_var = nc2ids.group[var.name + ":shape"]
+                    shape = shape_var[self.index]
+                    if shape.all():
+                        value = var[self.index + tuple(map(slice, shape))]
+                else:
+                    value = var[self.index]
+                    if value == getattr(var, "_FillValue", None):
+                        value = None  # Skip setting
+            else:
+                value = var[self.index]
+
+            if value is not None:
+                if isinstance(value, np.ndarray):
+                    # Convert the numpy array to a read-only view
+                    value = value.view()
+                    value.flags.writeable = False
+                # NOTE: bypassing IDSPrimitive.value.setter logic
+                child._IDSPrimitive__value = value
+
+
+class LazyArrayStructContext(LazyContext):
+    def __init__(self, nc2ids, index, size):
+        super().__init__(nc2ids, index)
+        self.size = size
+
+    def get_context(self):
+        return self  # IDSStructArray expects to get something with a size attribute
+
+    def iterate_to_index(self, index: int) -> LazyContext:
+        return LazyContext(self.nc2ids, self.index + (index,))