Skip to content
Merged
30 changes: 29 additions & 1 deletion docs/source/netcdf.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ IMAS netCDF files

IMAS-Python supports reading IDSs from and writing IDSs to IMAS netCDF files. This
feature is currently in alpha status, and its functionality may change in
upcoming minor releases of IMAS-Python.
upcoming (minor) releases of IMAS-Python.

A detailed description of the IMAS netCDF format and conventions can be found on
the :ref:`IMAS conventions for the netCDF data format` page.
Expand Down Expand Up @@ -42,6 +42,34 @@ will be used for :py:meth:`~imas.db_entry.DBEntry.get` and
imas.util.print_tree(cp2)


Implemented features of a netCDF ``DBEntry``
--------------------------------------------

A netCDF ``DBEntry`` doesn't implement all features that are supported by
``imas_core``. The following table provides an overview of the implemented
features that are supported by DBEntries using ``imas_core`` respectively
``netCDF``:

.. list-table::
:header-rows: 1

* - Feature
- ``imas_core``
- ``netCDF``
* - :ref:`Lazy loading`
- Yes
- Yes
* - :ref:`Automatic conversion between DD versions <Conversion of IDSs between DD versions>`
- When reading and writing
- When reading
* - ``get_slice`` / ``put_slice``
- Yes
- Not implemented
* - ``get_sample``
- Yes (requires ``imas_core >= 5.4.0``)
- Not implemented


Using IMAS netCDF files with 3rd-party tools
--------------------------------------------

Expand Down
4 changes: 4 additions & 0 deletions imas/backends/imas_core/al_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import numpy

import imas
from imas.backends.imas_core.imas_interface import ll_interface
from imas.exception import LowlevelError
from imas.ids_defs import (
Expand Down Expand Up @@ -280,6 +281,9 @@ def __init__(
self.context = None
"""Potential weak reference to opened context."""

def get_child(self, child):
imas.backends.imas_core.db_entry_helpers._get_child(child, self)

def get_context(self) -> ALContext:
"""Create and yield the actual ALContext."""
if self.dbentry._db_ctx is not self.dbentry_ctx:
Expand Down
10 changes: 3 additions & 7 deletions imas/backends/imas_core/db_entry_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def get_children(
structure: IDSStructure,
ctx: ALContext,
time_mode: int,
nbc_map: Optional[NBCPathMap],
nbc_map: Optional["NBCPathMap"],
) -> None:
"""Recursively get all children of an IDSStructure."""
# NOTE: changes in this method must be propagated to _get_child and vice versa
Expand Down Expand Up @@ -77,15 +77,11 @@ def get_children(
getattr(structure, name)._IDSPrimitive__value = data


def _get_child(child: IDSBase, ctx: Optional[LazyALContext]):
def _get_child(child: IDSBase, ctx: LazyALContext):
"""Get a single child when required (lazy loading)."""
# NOTE: changes in this method must be propagated to _get_children and vice versa
# Performance: this method is specialized for the lazy get

# ctx can be None when the parent structure does not exist in the on-disk DD version
if ctx is None:
return # There is no data to be loaded

time_mode = ctx.time_mode
if time_mode == IDS_TIME_MODE_INDEPENDENT and child.metadata.type.is_dynamic:
return # skip dynamic (time-dependent) nodes
Expand Down Expand Up @@ -148,7 +144,7 @@ def put_children(
ctx: ALContext,
time_mode: int,
is_slice: bool,
nbc_map: Optional[NBCPathMap],
nbc_map: Optional["NBCPathMap"],
verify_maxoccur: bool,
) -> None:
"""Recursively put all children of an IDSStructure"""
Expand Down
2 changes: 1 addition & 1 deletion imas/backends/imas_core/imas_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
imasdef = None
lowlevel = None
logger.critical(
"Could not import 'al_core': %s. Some functionality is not available.",
"Could not import 'imas_core': %s. Some functionality is not available.",
exc,
)

Expand Down
25 changes: 13 additions & 12 deletions imas/backends/netcdf/db_entry_nc.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from imas.backends.netcdf.ids2nc import IDS2NC
from imas.backends.netcdf.nc2ids import NC2IDS
from imas.exception import DataEntryException, InvalidNetCDFEntry
from imas.ids_convert import NBCPathMap, convert_ids
from imas.ids_convert import NBCPathMap, dd_version_map_from_factories
from imas.ids_factory import IDSFactory
from imas.ids_toplevel import IDSToplevel

Expand Down Expand Up @@ -108,10 +108,6 @@ def get(
else:
func = "get_sample"
raise NotImplementedError(f"`{func}` is not available for netCDF files.")
if lazy:
raise NotImplementedError(
"Lazy loading is not implemented for netCDF files."
)

# Check if the IDS/occurrence exists, and obtain the group it is stored in
try:
Expand All @@ -123,14 +119,19 @@ def get(

# Load data into the destination IDS
if self._ds_factory.dd_version == destination._dd_version:
NC2IDS(group, destination).run()
NC2IDS(group, destination, destination.metadata, None).run(lazy)
else:
# FIXME: implement automatic conversion using nbc_map
# As a work-around: do an explicit conversion, but automatic conversion
# will also be needed to implement lazy loading.
ids = self._ds_factory.new(ids_name)
NC2IDS(group, ids).run()
convert_ids(ids, None, target=destination)
# Construct relevant NBCPathMap, the one we get from DBEntry has the reverse
# mapping from what we need. The imas_core logic does the mapping from
# in-memory to on-disk, while we take what is on-disk and map it to
# in-memory.
ddmap, source_is_older = dd_version_map_from_factories(
ids_name, self._ds_factory, self._factory
)
nbc_map = ddmap.old_to_new if source_is_older else ddmap.new_to_old
NC2IDS(
group, destination, self._ds_factory.new(ids_name).metadata, nbc_map
).run(lazy)

return destination

Expand Down
149 changes: 134 additions & 15 deletions imas/backends/netcdf/nc2ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
from typing import Iterator, List, Optional, Tuple

import netCDF4
import numpy as np

from imas.backends.netcdf import ids2nc
from imas.backends.netcdf.nc_metadata import NCMetadata
from imas.exception import InvalidNetCDFEntry
from imas.ids_base import IDSBase
from imas.ids_convert import NBCPathMap
from imas.ids_data_type import IDSDataType
from imas.ids_defs import IDS_TIME_MODE_HOMOGENEOUS
from imas.ids_metadata import IDSMetadata
Expand Down Expand Up @@ -70,22 +72,37 @@ def _tree_iter(
class NC2IDS:
"""Class responsible for reading an IDS from a NetCDF group."""

def __init__(self, group: netCDF4.Group, ids: IDSToplevel) -> None:
def __init__(
self,
group: netCDF4.Group,
ids: IDSToplevel,
ids_metadata: IDSMetadata,
nbc_map: Optional[NBCPathMap],
) -> None:
"""Initialize NC2IDS converter.

Args:
group: NetCDF group that stores the IDS data.
ids: Corresponding IDS toplevel to store the data in.
ids_metadata: Metadata corresponding to the DD version that the data is
stored in.
nbc_map: Path map for implicit DD conversions.
"""
self.group = group
"""NetCDF Group that the IDS is stored in."""
self.ids = ids
"""IDS to store the data in."""
self.ids_metadata = ids_metadata
"""Metadata of the IDS in the DD version that the data is stored in"""
self.nbc_map = nbc_map
"""Path map for implicit DD conversions."""

self.ncmeta = NCMetadata(ids.metadata)
self.ncmeta = NCMetadata(ids_metadata)
"""NetCDF related metadata."""
self.variables = list(group.variables)
"""List of variable names stored in the netCDF group."""

self._lazy_map = {}
# Don't use masked arrays: they're slow and we'll handle most of the unset
# values through the `:shape` arrays
self.group.set_auto_mask(False)
Expand All @@ -99,31 +116,60 @@ def __init__(self, group: netCDF4.Group, ids: IDSToplevel) -> None:
"Mandatory variable `ids_properties.homogeneous_time` does not exist."
)
var = group["ids_properties.homogeneous_time"]
self._validate_variable(var, ids.ids_properties.homogeneous_time.metadata)
self._validate_variable(var, ids.metadata["ids_properties/homogeneous_time"])
if var[()] not in [0, 1, 2]:
raise InvalidNetCDFEntry(
f"Invalid value for ids_properties.homogeneous_time: {var[()]}. "
"Was expecting: 0, 1 or 2."
)
self.homogeneous_time = var[()] == IDS_TIME_MODE_HOMOGENEOUS

def run(self) -> None:
def run(self, lazy: bool) -> None:
"""Load the data from the netCDF group into the IDS."""
self.variables.sort()
self.validate_variables()
if lazy:
self.ids._set_lazy_context(LazyContext(self))
for var_name in self.variables:
if var_name.endswith(":shape"):
continue
metadata = self.ids.metadata[var_name]
metadata = self.ids_metadata[var_name]

if metadata.data_type is IDSDataType.STRUCTURE:
continue # This only contains DD metadata we already know

# Handle implicit DD version conversion
if self.nbc_map is None:
target_metadata = metadata # no conversion
elif metadata.path_string in self.nbc_map:
new_path = self.nbc_map.path[metadata.path_string]
if new_path is None:
logging.info(
"Not loading data for %s: no equivalent data structure exists "
"in the target Data Dictionary version.",
metadata.path_string,
)
continue
target_metadata = self.ids.metadata[new_path]
elif metadata.path_string in self.nbc_map.type_change:
logging.info(
"Not loading data for %s: cannot hanlde type changes when "
"implicitly converting data to the target Data Dictionary version.",
metadata.path_string,
)
continue
else:
target_metadata = metadata # no conversion required

var = self.group[var_name]
if lazy:
self._lazy_map[target_metadata.path_string] = var
continue

if metadata.data_type is IDSDataType.STRUCT_ARRAY:
if "sparse" in var.ncattrs():
shapes = self.group[var_name + ":shape"][()]
for index, node in tree_iter(self.ids, metadata):
for index, node in tree_iter(self.ids, target_metadata):
node.resize(shapes[index][0])

else:
Expand All @@ -132,7 +178,7 @@ def run(self) -> None:
metadata.path_string, self.homogeneous_time
)[-1]
size = self.group.dimensions[dim].size
for _, node in tree_iter(self.ids, metadata):
for _, node in tree_iter(self.ids, target_metadata):
node.resize(size)

continue
Expand All @@ -144,23 +190,30 @@ def run(self) -> None:
if "sparse" in var.ncattrs():
if metadata.ndim:
shapes = self.group[var_name + ":shape"][()]
for index, node in tree_iter(self.ids, metadata):
for index, node in tree_iter(self.ids, target_metadata):
shape = shapes[index]
if shape.all():
node.value = data[index + tuple(map(slice, shapes[index]))]
# NOTE: bypassing IDSPrimitive.value.setter logic
node._IDSPrimitive__value = data[
index + tuple(map(slice, shape))
]
else:
for index, node in tree_iter(self.ids, metadata):
for index, node in tree_iter(self.ids, target_metadata):
value = data[index]
if value != getattr(var, "_FillValue", None):
node.value = data[index]
# NOTE: bypassing IDSPrimitive.value.setter logic
node._IDSPrimitive__value = value

elif metadata.path_string not in self.ncmeta.aos:
# Shortcut for assigning untensorized data
self.ids[metadata.path] = data
# Note: var[()] can return 0D numpy arrays. Instead of handling this
# here, we'll let IDSPrimitive.value.setter take care of it:
self.ids[target_metadata.path].value = data

else:
for index, node in tree_iter(self.ids, metadata):
node.value = data[index]
for index, node in tree_iter(self.ids, target_metadata):
# NOTE: bypassing IDSPrimitive.value.setter logic
node._IDSPrimitive__value = data[index]

def validate_variables(self) -> None:
"""Validate that all variables in the netCDF Group exist and match the DD."""
Expand Down Expand Up @@ -194,7 +247,7 @@ def validate_variables(self) -> None:
# Check that the DD defines this variable, and validate its metadata
var = self.group[var_name]
try:
metadata = self.ids.metadata[var_name]
metadata = self.ids_metadata[var_name]
except KeyError:
raise InvalidNetCDFEntry(
f"Invalid variable {var_name}: no such variable exists in the "
Expand Down Expand Up @@ -300,3 +353,69 @@ def _validate_sparsity(
raise variable_error(
shape_var, "dtype", shape_var.dtype, "any integer type"
)


class LazyContext:
def __init__(self, nc2ids, index=()):
self.nc2ids = nc2ids
self.index = index

def get_child(self, child):
metadata = child.metadata
path = metadata.path_string
data_type = metadata.data_type
nc2ids = self.nc2ids
var = nc2ids._lazy_map.get(path)

if data_type is IDSDataType.STRUCT_ARRAY:
# Determine size of the aos
if var is None:
size = 0
elif "sparse" in var.ncattrs():
size = nc2ids.group[var.name + ":shape"][self.index][0]
else:
# FIXME: extract dimension name from nc file?
dim = nc2ids.ncmeta.get_dimensions(
metadata.path_string, nc2ids.homogeneous_time
)[-1]
size = nc2ids.group.dimensions[dim].size

child._set_lazy_context(LazyArrayStructContext(nc2ids, self.index, size))

elif data_type is IDSDataType.STRUCTURE:
child._set_lazy_context(self)

elif var is not None: # Data elements
value = None
if "sparse" in var.ncattrs():
if metadata.ndim:
shape_var = nc2ids.group[var.name + ":shape"]
shape = shape_var[self.index]
if shape.all():
value = var[self.index + tuple(map(slice, shape))]
else:
value = var[self.index]
if value == getattr(var, "_FillValue", None):
value = None # Skip setting
else:
value = var[self.index]

if value is not None:
if isinstance(value, np.ndarray):
# Convert the numpy array to a read-only view
value = value.view()
value.flags.writeable = False
# NOTE: bypassing IDSPrimitive.value.setter logic
child._IDSPrimitive__value = value


class LazyArrayStructContext(LazyContext):
def __init__(self, nc2ids, index, size):
super().__init__(nc2ids, index)
self.size = size

def get_context(self):
return self # IDSStructArray expects to get something with a size attribute

def iterate_to_index(self, index: int) -> LazyContext:
return LazyContext(self.nc2ids, self.index + (index,))
Loading