Skip to content

ENH: enable H5NetCDFStore to work with already open h5netcdf.File a… #3618

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jan 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ Breaking changes

New Features
~~~~~~~~~~~~
- Support using an existing, opened h5netcdf ``File`` with
:py:class:`~xarray.backends.H5NetCDFStore`. This permits creating an
:py:class:`~xarray.Dataset` from a h5netcdf ``File`` that has been opened
using other means (:issue:`3618`).
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
- Implement :py:func:`median` and :py:func:`nanmedian` for dask arrays. This works by rechunking
to a single chunk along all reduction axes. (:issue:`2999`).
By `Deepak Cherian <https://github.com/dcherian>`_.
Expand Down
6 changes: 3 additions & 3 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,7 @@ def maybe_decode_store(store, lock=False):
elif engine == "pydap":
store = backends.PydapDataStore.open(filename_or_obj, **backend_kwargs)
elif engine == "h5netcdf":
store = backends.H5NetCDFStore(
store = backends.H5NetCDFStore.open(
filename_or_obj, group=group, lock=lock, **backend_kwargs
)
elif engine == "pynio":
Expand All @@ -527,7 +527,7 @@ def maybe_decode_store(store, lock=False):
if engine == "scipy":
store = backends.ScipyDataStore(filename_or_obj, **backend_kwargs)
elif engine == "h5netcdf":
store = backends.H5NetCDFStore(
store = backends.H5NetCDFStore.open(
filename_or_obj, group=group, lock=lock, **backend_kwargs
)

Expand Down Expand Up @@ -981,7 +981,7 @@ def open_mfdataset(
WRITEABLE_STORES: Dict[str, Callable] = {
"netcdf4": backends.NetCDF4DataStore.open,
"scipy": backends.ScipyDataStore,
"h5netcdf": backends.H5NetCDFStore,
"h5netcdf": backends.H5NetCDFStore.open,
}


Expand Down
2 changes: 1 addition & 1 deletion xarray/backends/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def find_root_and_group(ds):
"""Find the root and group name of a netCDF4/h5netcdf dataset."""
hierarchy = ()
while ds.parent is not None:
hierarchy = (ds.name,) + hierarchy
hierarchy = (ds.name.split("/")[-1],) + hierarchy
ds = ds.parent
group = "/" + "/".join(hierarchy)
return ds, group
Expand Down
61 changes: 46 additions & 15 deletions xarray/backends/h5netcdf_.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

from .. import Variable
from ..core import indexing
from ..core.utils import FrozenDict
from .common import WritableCFDataStore
from .file_manager import CachingFileManager
from ..core.utils import FrozenDict, is_remote_uri
from .common import WritableCFDataStore, find_root_and_group
from .file_manager import CachingFileManager, DummyFileManager
from .locks import HDF5_LOCK, combine_locks, ensure_lock, get_write_lock
from .netCDF4_ import (
BaseNetCDF4Array,
Expand Down Expand Up @@ -69,8 +69,47 @@ class H5NetCDFStore(WritableCFDataStore):
"""Store for reading and writing data via h5netcdf
"""

def __init__(
self,
__slots__ = (
"autoclose",
"format",
"is_remote",
"lock",
"_filename",
"_group",
"_manager",
"_mode",
)

def __init__(self, manager, group=None, mode=None, lock=HDF5_LOCK, autoclose=False):

import h5netcdf

if isinstance(manager, (h5netcdf.File, h5netcdf.Group)):
if group is None:
root, group = find_root_and_group(manager)
else:
if not type(manager) is h5netcdf.File:
raise ValueError(
"must supply a h5netcdf.File if the group "
"argument is provided"
)
root = manager
manager = DummyFileManager(root)

self._manager = manager
self._group = group
self._mode = mode
self.format = None
# todo: utilizing find_root_and_group seems a bit clunky
# making filename available on h5netcdf.Group seems better
self._filename = find_root_and_group(self.ds)[0].filename
self.is_remote = is_remote_uri(self._filename)
self.lock = ensure_lock(lock)
self.autoclose = autoclose

@classmethod
def open(
cls,
filename,
mode="r",
format=None,
Expand All @@ -86,22 +125,14 @@ def __init__(

kwargs = {"invalid_netcdf": invalid_netcdf}

self._manager = CachingFileManager(
h5netcdf.File, filename, mode=mode, kwargs=kwargs
)

if lock is None:
if mode == "r":
lock = HDF5_LOCK
else:
lock = combine_locks([HDF5_LOCK, get_write_lock(filename)])

self._group = group
self.format = format
self._filename = filename
self._mode = mode
self.lock = ensure_lock(lock)
self.autoclose = autoclose
manager = CachingFileManager(h5netcdf.File, filename, mode=mode, kwargs=kwargs)
return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)

def _acquire(self, needs_lock=True):
with self._manager.acquire_context(needs_lock) as root:
Expand Down
23 changes: 22 additions & 1 deletion xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -2182,7 +2182,7 @@ class TestH5NetCDFData(NetCDF4Base):
@contextlib.contextmanager
def create_store(self):
with create_tmp_file() as tmp_file:
yield backends.H5NetCDFStore(tmp_file, "w")
yield backends.H5NetCDFStore.open(tmp_file, "w")

@pytest.mark.filterwarnings("ignore:complex dtypes are supported by h5py")
@pytest.mark.parametrize(
Expand Down Expand Up @@ -2345,6 +2345,27 @@ def test_dump_encodings_h5py(self):
assert actual.x.encoding["compression"] == "lzf"
assert actual.x.encoding["compression_opts"] is None

def test_already_open_dataset_group(self):
import h5netcdf

with create_tmp_file() as tmp_file:
with nc4.Dataset(tmp_file, mode="w") as nc:
group = nc.createGroup("g")
v = group.createVariable("x", "int")
v[...] = 42

h5 = h5netcdf.File(tmp_file, mode="r")
store = backends.H5NetCDFStore(h5["g"])
with open_dataset(store) as ds:
expected = Dataset({"x": ((), 42)})
assert_identical(expected, ds)

h5 = h5netcdf.File(tmp_file, mode="r")
store = backends.H5NetCDFStore(h5, group="g")
with open_dataset(store) as ds:
expected = Dataset({"x": ((), 42)})
assert_identical(expected, ds)


@requires_h5netcdf
class TestH5NetCDFFileObject(TestH5NetCDFData):
Expand Down