Skip to content

Add netcdftime as an optional dependency. #1920

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Feb 19, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ matrix:
env: CONDA_ENV=py36-rasterio1.0alpha
- python: 3.6
env: CONDA_ENV=py36-zarr-dev
- python: 3.6
env: CONDA_ENV=py36-netcdftime-dev
- python: 3.5
env: CONDA_ENV=docs
allow_failures:
Expand Down Expand Up @@ -73,6 +75,8 @@ matrix:
env: CONDA_ENV=py36-rasterio1.0alpha
- python: 3.6
env: CONDA_ENV=py36-zarr-dev
- python: 3.6
env: CONDA_ENV=py36-netcdftime-dev

before_install:
- if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
Expand Down
13 changes: 13 additions & 0 deletions ci/requirements-py36-netcdftime-dev.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
name: test_env
channels:
- conda-forge
dependencies:
- python=3.6
- pytest
- flake8
- numpy
- pandas
- netcdftime
- pip:
- coveralls
- pytest-cov
2 changes: 2 additions & 0 deletions doc/installing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ For netCDF and IO
- `pynio <https://www.pyngl.ucar.edu/Nio.shtml>`__: for reading GRIB and other
geoscience specific file formats
- `zarr <http://zarr.readthedocs.io/>`__: for chunked, compressed, N-dimensional arrays.
- `netcdftime <https://github.com/Unidata/netcdftime>`__: recommended if you
want to encode/decode datetimes for non-standard calendars.

For accelerating xarray
~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ Enhancements
- Speed of reindexing/alignment with dask array is orders of magnitude faster
when inserting missing values (:issue:`1847`).
By `Stephan Hoyer <https://github.com/shoyer>`_.
- Add ``netcdftime`` as an optional dependency of xarray. This allows for
encoding/decoding of datetimes with non-standard calendars without the
netCDF4 dependency (:issue:`1084`).
By `Joe Hamman <https://github.com/jhamman>`_.

.. _Zarr: http://zarr.readthedocs.io/

Expand Down
2 changes: 1 addition & 1 deletion xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT,
lock=None, data_vars='all', coords='different', **kwargs):
"""Open multiple files as a single dataset.

Requires dask to be installed. See documentation for details on dask [1].
Requires dask to be installed. See documentation for details on dask [1].
Attributes from the first dataset file are used for the combined dataset.

Parameters
Expand Down
57 changes: 38 additions & 19 deletions xarray/coding/times.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,26 @@
'milliseconds', 'microseconds'])


def _import_netcdftime():
'''
helper function handle the transition to netcdftime as a stand-alone
package
'''
try:
# Try importing netcdftime directly
import netcdftime as nctime
if not hasattr(nctime, 'num2date'):
# must have gotten an old version from netcdf4-python
raise ImportError
except ImportError:
# in netCDF4 the num2date/date2num function are top-level api
try:
import netCDF4 as nctime
except ImportError:
raise ImportError("Failed to import netcdftime")
return nctime


def _netcdf_to_numpy_timeunit(units):
units = units.lower()
if not units.endswith('s'):
Expand All @@ -59,23 +79,23 @@ def _unpack_netcdf_time_units(units):
return delta_units, ref_date


def _decode_datetime_with_netcdf4(num_dates, units, calendar):
import netCDF4 as nc4
def _decode_datetime_with_netcdftime(num_dates, units, calendar):
nctime = _import_netcdftime()

dates = np.asarray(nc4.num2date(num_dates, units, calendar))
dates = np.asarray(nctime.num2date(num_dates, units, calendar))
if (dates[np.nanargmin(num_dates)].year < 1678 or
dates[np.nanargmax(num_dates)].year >= 2262):
warnings.warn('Unable to decode time axis into full '
'numpy.datetime64 objects, continuing using dummy '
'netCDF4.datetime objects instead, reason: dates out'
'netcdftime.datetime objects instead, reason: dates out'
' of range', SerializationWarning, stacklevel=3)
else:
try:
dates = nctime_to_nptime(dates)
except ValueError as e:
warnings.warn('Unable to decode time axis into full '
'numpy.datetime64 objects, continuing using '
'dummy netCDF4.datetime objects instead, reason:'
'dummy netcdftime.datetime objects instead, reason:'
'{0}'.format(e), SerializationWarning, stacklevel=3)
return dates

Expand Down Expand Up @@ -111,15 +131,15 @@ def decode_cf_datetime(num_dates, units, calendar=None):
numpy array of date time objects.

For standard (Gregorian) calendars, this function uses vectorized
operations, which makes it much faster than netCDF4.num2date. In such a
operations, which makes it much faster than netcdftime.num2date. In such a
case, the returned array will be of type np.datetime64.

Note that time unit in `units` must not be smaller than microseconds and
not larger than days.

See also
--------
netCDF4.num2date
netcdftime.num2date
"""
num_dates = np.asarray(num_dates)
flat_num_dates = num_dates.ravel()
Expand All @@ -137,7 +157,7 @@ def decode_cf_datetime(num_dates, units, calendar=None):
ref_date = pd.Timestamp(ref_date)
except ValueError:
# ValueError is raised by pd.Timestamp for non-ISO timestamp
# strings, in which case we fall back to using netCDF4
# strings, in which case we fall back to using netcdftime
raise OutOfBoundsDatetime

# fixes: https://github.com/pydata/pandas/issues/14068
Expand All @@ -155,9 +175,8 @@ def decode_cf_datetime(num_dates, units, calendar=None):
ref_date).values

except (OutOfBoundsDatetime, OverflowError):
dates = _decode_datetime_with_netcdf4(flat_num_dates.astype(np.float),
units,
calendar)
dates = _decode_datetime_with_netcdftime(
flat_num_dates.astype(np.float), units, calendar)

return dates.reshape(num_dates.shape)

Expand Down Expand Up @@ -215,7 +234,7 @@ def infer_timedelta_units(deltas):


def nctime_to_nptime(times):
"""Given an array of netCDF4.datetime objects, return an array of
"""Given an array of netcdftime.datetime objects, return an array of
numpy.datetime64 objects of the same size"""
times = np.asarray(times)
new = np.empty(times.shape, dtype='M8[ns]')
Expand All @@ -235,20 +254,20 @@ def _cleanup_netcdf_time_units(units):
return units


def _encode_datetime_with_netcdf4(dates, units, calendar):
"""Fallback method for encoding dates using netCDF4-python.
def _encode_datetime_with_netcdftime(dates, units, calendar):
"""Fallback method for encoding dates using netcdftime.

This method is more flexible than xarray's parsing using datetime64[ns]
arrays but also slower because it loops over each element.
"""
import netCDF4 as nc4
nctime = _import_netcdftime()

if np.issubdtype(dates.dtype, np.datetime64):
# numpy's broken datetime conversion only works for us precision
dates = dates.astype('M8[us]').astype(datetime)

def encode_datetime(d):
return np.nan if d is None else nc4.date2num(d, units, calendar)
return np.nan if d is None else nctime.date2num(d, units, calendar)

return np.vectorize(encode_datetime)(dates)

Expand All @@ -268,7 +287,7 @@ def encode_cf_datetime(dates, units=None, calendar=None):

See also
--------
netCDF4.date2num
netcdftime.date2num
"""
dates = np.asarray(dates)

Expand All @@ -283,7 +302,7 @@ def encode_cf_datetime(dates, units=None, calendar=None):
delta, ref_date = _unpack_netcdf_time_units(units)
try:
if calendar not in _STANDARD_CALENDARS or dates.dtype.kind == 'O':
# parse with netCDF4 instead
# parse with netcdftime instead
raise OutOfBoundsDatetime
assert dates.dtype == 'datetime64[ns]'

Expand All @@ -293,7 +312,7 @@ def encode_cf_datetime(dates, units=None, calendar=None):
num = (dates - ref_date) / time_delta

except (OutOfBoundsDatetime, OverflowError):
num = _encode_datetime_with_netcdf4(dates, units, calendar)
num = _encode_datetime_with_netcdftime(dates, units, calendar)

num = cast_to_int_if_safe(num)
return (num, units, calendar)
Expand Down
1 change: 1 addition & 0 deletions xarray/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def _importorskip(modname, minversion=None):
has_netCDF4, requires_netCDF4 = _importorskip('netCDF4')
has_h5netcdf, requires_h5netcdf = _importorskip('h5netcdf')
has_pynio, requires_pynio = _importorskip('Nio')
has_netcdftime, requires_netcdftime = _importorskip('netcdftime')
has_dask, requires_dask = _importorskip('dask')
has_bottleneck, requires_bottleneck = _importorskip('bottleneck')
has_rasterio, requires_rasterio = _importorskip('rasterio')
Expand Down
12 changes: 6 additions & 6 deletions xarray/tests/test_conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
from xarray.core import utils, indexing
from xarray.testing import assert_identical
from . import (
TestCase, requires_netCDF4, unittest, raises_regex, IndexerMaker,
assert_array_equal)
TestCase, requires_netCDF4, requires_netcdftime, unittest, raises_regex,
IndexerMaker, assert_array_equal)
from .test_backends import CFEncodedDataTest
from xarray.core.pycompat import iteritems
from xarray.backends.memory import InMemoryDataStore
Expand Down Expand Up @@ -181,7 +181,7 @@ def test_decode_cf_with_conflicting_fill_missing_value():
assert_identical(actual, expected)


@requires_netCDF4
@requires_netcdftime
class TestEncodeCFVariable(TestCase):
def test_incompatible_attributes(self):
invalid_vars = [
Expand Down Expand Up @@ -237,7 +237,7 @@ def test_multidimensional_coordinates(self):
assert 'coordinates' not in attrs


@requires_netCDF4
@requires_netcdftime
class TestDecodeCF(TestCase):
def test_dataset(self):
original = Dataset({
Expand Down Expand Up @@ -303,7 +303,7 @@ def test_invalid_time_units_raises_eagerly(self):
with raises_regex(ValueError, 'unable to decode time'):
decode_cf(ds)

@requires_netCDF4
@requires_netcdftime
def test_dataset_repr_with_netcdf4_datetimes(self):
# regression test for #347
attrs = {'units': 'days since 0001-01-01', 'calendar': 'noleap'}
Expand All @@ -316,7 +316,7 @@ def test_dataset_repr_with_netcdf4_datetimes(self):
ds = decode_cf(Dataset({'time': ('time', [0, 1], attrs)}))
assert '(time) datetime64[ns]' in repr(ds)

@requires_netCDF4
@requires_netcdftime
def test_decode_cf_datetime_transition_to_invalid(self):
# manually create dataset with not-decoded date
from datetime import datetime
Expand Down