From f0a8019ec5c1845bbd4dedc50e7bf9831c4a2018 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 16 Feb 2018 13:44:34 -0800 Subject: [PATCH 1/8] rework imports to support using netcdftime package when netcdf4-python is not installed --- doc/installing.rst | 2 ++ doc/whats-new.rst | 4 +++ xarray/backends/api.py | 2 +- xarray/coding/times.py | 56 +++++++++++++++++++++----------- xarray/tests/__init__.py | 1 + xarray/tests/test_conventions.py | 12 +++---- 6 files changed, 51 insertions(+), 26 deletions(-) diff --git a/doc/installing.rst b/doc/installing.rst index b9a1fff59cc..8be025665e2 100644 --- a/doc/installing.rst +++ b/doc/installing.rst @@ -25,6 +25,8 @@ For netCDF and IO - `pynio `__: for reading GRIB and other geoscience specific file formats - `zarr `__: for chunked, compressed, N-dimensional arrays. +- `netcdftime `__: recommended if you + want to encode/decode datetimes for non-standard calendars. For accelerating xarray ~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/whats-new.rst b/doc/whats-new.rst index bc5a5bb5ea4..1899a29c5e7 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -93,6 +93,10 @@ Enhancements - Speed of reindexing/alignment with dask array is orders of magnitude faster when inserting missing values (:issue:`1847`). By `Stephan Hoyer `_. +- Add ``netcdftime`` as an optional dependency of xarray. This allows for + encoding/decoding of datetimes with non-standard calendars without the + netCDF4 dependency (:issue:`1084`). + By `Joe Hamman `_. .. _Zarr: http://zarr.readthedocs.io/ diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 668fb53899d..1effdf18dac 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -443,7 +443,7 @@ def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT, lock=None, data_vars='all', coords='different', **kwargs): """Open multiple files as a single dataset. - Requires dask to be installed. See documentation for details on dask [1]. + Requires dask to be installed. See documentation for details on dask [1]. Attributes from the first dataset file are used for the combined dataset. Parameters diff --git a/xarray/coding/times.py b/xarray/coding/times.py index e00769af884..14cd11f7f4e 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -40,6 +40,25 @@ 'milliseconds', 'microseconds']) +def _import_netcdftime(): + ''' + helper function handle the transition to netcdftime as a stand-alone + package + ''' + try: + # in netCDF4 the num2date/date2num function are top-level api + import netCDF4 as nctime + except ImportError: + # fallback if netCDF4-python is not installed. Try importing netcdftime + # directly + try: + import netcdftime as nctime + except: + raise ImportError( + "Either the netcdftime or the netCDF4 package is required") + return nctime + + def _netcdf_to_numpy_timeunit(units): units = units.lower() if not units.endswith('s'): @@ -59,15 +78,15 @@ def _unpack_netcdf_time_units(units): return delta_units, ref_date -def _decode_datetime_with_netcdf4(num_dates, units, calendar): - import netCDF4 as nc4 +def _decode_datetime_with_netcdftime(num_dates, units, calendar): + nctime = _import_netcdftime() - dates = np.asarray(nc4.num2date(num_dates, units, calendar)) + dates = np.asarray(nctime.num2date(num_dates, units, calendar)) if (dates[np.nanargmin(num_dates)].year < 1678 or dates[np.nanargmax(num_dates)].year >= 2262): warnings.warn('Unable to decode time axis into full ' 'numpy.datetime64 objects, continuing using dummy ' - 'netCDF4.datetime objects instead, reason: dates out' + 'netcdftime.datetime objects instead, reason: dates out' ' of range', SerializationWarning, stacklevel=3) else: try: @@ -75,7 +94,7 @@ def _decode_datetime_with_netcdf4(num_dates, units, calendar): except ValueError as e: warnings.warn('Unable to decode time axis into full ' 'numpy.datetime64 objects, continuing using ' - 'dummy netCDF4.datetime objects instead, reason:' + 'dummy netcdftime.datetime objects instead, reason:' '{0}'.format(e), SerializationWarning, stacklevel=3) return dates @@ -111,7 +130,7 @@ def decode_cf_datetime(num_dates, units, calendar=None): numpy array of date time objects. For standard (Gregorian) calendars, this function uses vectorized - operations, which makes it much faster than netCDF4.num2date. In such a + operations, which makes it much faster than netcdftime.num2date. In such a case, the returned array will be of type np.datetime64. Note that time unit in `units` must not be smaller than microseconds and @@ -119,7 +138,7 @@ def decode_cf_datetime(num_dates, units, calendar=None): See also -------- - netCDF4.num2date + netcdftime.num2date """ num_dates = np.asarray(num_dates) flat_num_dates = num_dates.ravel() @@ -137,7 +156,7 @@ def decode_cf_datetime(num_dates, units, calendar=None): ref_date = pd.Timestamp(ref_date) except ValueError: # ValueError is raised by pd.Timestamp for non-ISO timestamp - # strings, in which case we fall back to using netCDF4 + # strings, in which case we fall back to using netcdftime raise OutOfBoundsDatetime # fixes: https://github.com/pydata/pandas/issues/14068 @@ -155,9 +174,8 @@ def decode_cf_datetime(num_dates, units, calendar=None): ref_date).values except (OutOfBoundsDatetime, OverflowError): - dates = _decode_datetime_with_netcdf4(flat_num_dates.astype(np.float), - units, - calendar) + dates = _decode_datetime_with_netcdftime( + flat_num_dates.astype(np.float), units, calendar) return dates.reshape(num_dates.shape) @@ -215,7 +233,7 @@ def infer_timedelta_units(deltas): def nctime_to_nptime(times): - """Given an array of netCDF4.datetime objects, return an array of + """Given an array of netcdftime.datetime objects, return an array of numpy.datetime64 objects of the same size""" times = np.asarray(times) new = np.empty(times.shape, dtype='M8[ns]') @@ -235,20 +253,20 @@ def _cleanup_netcdf_time_units(units): return units -def _encode_datetime_with_netcdf4(dates, units, calendar): - """Fallback method for encoding dates using netCDF4-python. +def _encode_datetime_with_netcdftime(dates, units, calendar): + """Fallback method for encoding dates using netcdftime. This method is more flexible than xarray's parsing using datetime64[ns] arrays but also slower because it loops over each element. """ - import netCDF4 as nc4 + nctime = _import_netcdftime() if np.issubdtype(dates.dtype, np.datetime64): # numpy's broken datetime conversion only works for us precision dates = dates.astype('M8[us]').astype(datetime) def encode_datetime(d): - return np.nan if d is None else nc4.date2num(d, units, calendar) + return np.nan if d is None else nctime.date2num(d, units, calendar) return np.vectorize(encode_datetime)(dates) @@ -268,7 +286,7 @@ def encode_cf_datetime(dates, units=None, calendar=None): See also -------- - netCDF4.date2num + netcdftime.date2num """ dates = np.asarray(dates) @@ -283,7 +301,7 @@ def encode_cf_datetime(dates, units=None, calendar=None): delta, ref_date = _unpack_netcdf_time_units(units) try: if calendar not in _STANDARD_CALENDARS or dates.dtype.kind == 'O': - # parse with netCDF4 instead + # parse with netcdftime instead raise OutOfBoundsDatetime assert dates.dtype == 'datetime64[ns]' @@ -293,7 +311,7 @@ def encode_cf_datetime(dates, units=None, calendar=None): num = (dates - ref_date) / time_delta except (OutOfBoundsDatetime, OverflowError): - num = _encode_datetime_with_netcdf4(dates, units, calendar) + num = _encode_datetime_with_netcdftime(dates, units, calendar) num = cast_to_int_if_safe(num) return (num, units, calendar) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index dadcdeff640..7c9528d741d 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -68,6 +68,7 @@ def _importorskip(modname, minversion=None): has_netCDF4, requires_netCDF4 = _importorskip('netCDF4') has_h5netcdf, requires_h5netcdf = _importorskip('h5netcdf') has_pynio, requires_pynio = _importorskip('Nio') +has_netcdftime, requires_netcdftime = _importorskip('netcdftime') has_dask, requires_dask = _importorskip('dask') has_bottleneck, requires_bottleneck = _importorskip('bottleneck') has_rasterio, requires_rasterio = _importorskip('rasterio') diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 6a509368017..0bc14204822 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -13,7 +13,7 @@ from xarray.core import utils, indexing from xarray.testing import assert_identical from . import ( - TestCase, requires_netCDF4, unittest, raises_regex, IndexerMaker, + TestCase, requires_netcdftime, unittest, raises_regex, IndexerMaker, assert_array_equal) from .test_backends import CFEncodedDataTest from xarray.core.pycompat import iteritems @@ -181,7 +181,7 @@ def test_decode_cf_with_conflicting_fill_missing_value(): assert_identical(actual, expected) -@requires_netCDF4 +@requires_netcdftime class TestEncodeCFVariable(TestCase): def test_incompatible_attributes(self): invalid_vars = [ @@ -237,7 +237,7 @@ def test_multidimensional_coordinates(self): assert 'coordinates' not in attrs -@requires_netCDF4 +@requires_netcdftime class TestDecodeCF(TestCase): def test_dataset(self): original = Dataset({ @@ -303,7 +303,7 @@ def test_invalid_time_units_raises_eagerly(self): with raises_regex(ValueError, 'unable to decode time'): decode_cf(ds) - @requires_netCDF4 + @requires_netcdftime def test_dataset_repr_with_netcdf4_datetimes(self): # regression test for #347 attrs = {'units': 'days since 0001-01-01', 'calendar': 'noleap'} @@ -316,7 +316,7 @@ def test_dataset_repr_with_netcdf4_datetimes(self): ds = decode_cf(Dataset({'time': ('time', [0, 1], attrs)})) assert '(time) datetime64[ns]' in repr(ds) - @requires_netCDF4 + @requires_netcdftime def test_decode_cf_datetime_transition_to_invalid(self): # manually create dataset with not-decoded date from datetime import datetime @@ -358,7 +358,7 @@ def null_wrap(ds): return InMemoryDataStore(variables=variables, attributes=ds.attrs) -@requires_netCDF4 +@requires_netcdftime class TestCFEncodedDataStore(CFEncodedDataTest, TestCase): @contextlib.contextmanager def create_store(self): From 7dcd06c88c07b7f5f65a321419abace9248ef014 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 16 Feb 2018 13:48:17 -0800 Subject: [PATCH 2/8] temporary travis build for netcdftime dev --- .travis.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.travis.yml b/.travis.yml index ee8ffcc4d5e..70c0a63ae08 100644 --- a/.travis.yml +++ b/.travis.yml @@ -45,6 +45,8 @@ matrix: env: CONDA_ENV=py36-rasterio1.0alpha - python: 3.6 env: CONDA_ENV=py36-zarr-dev + - python: 3.6 + env: CONDA_ENV=py36-netcdftime-dev - python: 3.5 env: CONDA_ENV=docs allow_failures: @@ -73,6 +75,8 @@ matrix: env: CONDA_ENV=py36-rasterio1.0alpha - python: 3.6 env: CONDA_ENV=py36-zarr-dev + - python: 3.6 + env: CONDA_ENV=py36-netcdftime-dev before_install: - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then From ec1015c190620965bd7a3a2b82b4c45279306a10 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 16 Feb 2018 14:45:29 -0800 Subject: [PATCH 3/8] flake8 and rework import logic --- xarray/coding/times.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 14cd11f7f4e..28afc46f660 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -46,16 +46,17 @@ def _import_netcdftime(): package ''' try: - # in netCDF4 the num2date/date2num function are top-level api - import netCDF4 as nctime + # Try importing netcdftime directly + import netcdftime as nctime + if not hasattr(nctime, 'num2date'): + # must have gotten an old version from netcdf4-python + raise ImportError except ImportError: - # fallback if netCDF4-python is not installed. Try importing netcdftime - # directly + # in netCDF4 the num2date/date2num function are top-level api try: - import netcdftime as nctime - except: - raise ImportError( - "Either the netcdftime or the netCDF4 package is required") + import netCDF4 as nctime + except ImportError: + raise ImportError("Failed to import netcdftime") return nctime From eb2c12db723099416a1e27419118ad0864496c38 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 16 Feb 2018 14:46:32 -0800 Subject: [PATCH 4/8] add missing netcdftime environment --- ci/requirements-py36-netcdftime-dev.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 ci/requirements-py36-netcdftime-dev.yml diff --git a/ci/requirements-py36-netcdftime-dev.yml b/ci/requirements-py36-netcdftime-dev.yml new file mode 100644 index 00000000000..54f138a51bc --- /dev/null +++ b/ci/requirements-py36-netcdftime-dev.yml @@ -0,0 +1,13 @@ +name: test_env +channels: + - conda-forge +dependencies: + - python=3.6 + - pytest + - flake8 + - numpy + - pandas + - pip: + - coveralls + - pytest-cov + - git+https://github.com/nidata/netcdftime.git From 6117d8c8f0ef3aa4709102c4dcc2c3654b5aa8c3 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 16 Feb 2018 16:38:33 -0800 Subject: [PATCH 5/8] fix typo in unidata --- ci/requirements-py36-netcdftime-dev.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements-py36-netcdftime-dev.yml b/ci/requirements-py36-netcdftime-dev.yml index 54f138a51bc..b199e4bb192 100644 --- a/ci/requirements-py36-netcdftime-dev.yml +++ b/ci/requirements-py36-netcdftime-dev.yml @@ -10,4 +10,4 @@ dependencies: - pip: - coveralls - pytest-cov - - git+https://github.com/nidata/netcdftime.git + - git+https://github.com/Unidata/netcdftime.git From 7487d3e731c110507318e4a9dabaca73c7b70e27 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 16 Feb 2018 18:02:14 -0800 Subject: [PATCH 6/8] cython too --- ci/requirements-py36-netcdftime-dev.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/requirements-py36-netcdftime-dev.yml b/ci/requirements-py36-netcdftime-dev.yml index b199e4bb192..a752c5f1705 100644 --- a/ci/requirements-py36-netcdftime-dev.yml +++ b/ci/requirements-py36-netcdftime-dev.yml @@ -7,6 +7,7 @@ dependencies: - flake8 - numpy - pandas + - cython - pip: - coveralls - pytest-cov From fed209e1d366abef8e53356e24cc592057cf3862 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 16 Feb 2018 21:56:50 -0800 Subject: [PATCH 7/8] use conda-forge --- ci/requirements-py36-netcdftime-dev.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ci/requirements-py36-netcdftime-dev.yml b/ci/requirements-py36-netcdftime-dev.yml index a752c5f1705..5c2193474b4 100644 --- a/ci/requirements-py36-netcdftime-dev.yml +++ b/ci/requirements-py36-netcdftime-dev.yml @@ -7,8 +7,7 @@ dependencies: - flake8 - numpy - pandas - - cython + - netcdftime - pip: - coveralls - pytest-cov - - git+https://github.com/Unidata/netcdftime.git From c2b35c1adee869d2c77680b305e83ac0679e421c Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Sat, 17 Feb 2018 09:27:54 -0800 Subject: [PATCH 8/8] require netcdf4 for tests that read/write --- xarray/tests/test_conventions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 0bc14204822..4520e7aefef 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -13,8 +13,8 @@ from xarray.core import utils, indexing from xarray.testing import assert_identical from . import ( - TestCase, requires_netcdftime, unittest, raises_regex, IndexerMaker, - assert_array_equal) + TestCase, requires_netCDF4, requires_netcdftime, unittest, raises_regex, + IndexerMaker, assert_array_equal) from .test_backends import CFEncodedDataTest from xarray.core.pycompat import iteritems from xarray.backends.memory import InMemoryDataStore @@ -358,7 +358,7 @@ def null_wrap(ds): return InMemoryDataStore(variables=variables, attributes=ds.attrs) -@requires_netcdftime +@requires_netCDF4 class TestCFEncodedDataStore(CFEncodedDataTest, TestCase): @contextlib.contextmanager def create_store(self):