diff --git a/.travis.yml b/.travis.yml index ee8ffcc4d5e..70c0a63ae08 100644 --- a/.travis.yml +++ b/.travis.yml @@ -45,6 +45,8 @@ matrix: env: CONDA_ENV=py36-rasterio1.0alpha - python: 3.6 env: CONDA_ENV=py36-zarr-dev + - python: 3.6 + env: CONDA_ENV=py36-netcdftime-dev - python: 3.5 env: CONDA_ENV=docs allow_failures: @@ -73,6 +75,8 @@ matrix: env: CONDA_ENV=py36-rasterio1.0alpha - python: 3.6 env: CONDA_ENV=py36-zarr-dev + - python: 3.6 + env: CONDA_ENV=py36-netcdftime-dev before_install: - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then diff --git a/ci/requirements-py36-netcdftime-dev.yml b/ci/requirements-py36-netcdftime-dev.yml new file mode 100644 index 00000000000..5c2193474b4 --- /dev/null +++ b/ci/requirements-py36-netcdftime-dev.yml @@ -0,0 +1,13 @@ +name: test_env +channels: + - conda-forge +dependencies: + - python=3.6 + - pytest + - flake8 + - numpy + - pandas + - netcdftime + - pip: + - coveralls + - pytest-cov diff --git a/doc/installing.rst b/doc/installing.rst index b9a1fff59cc..8be025665e2 100644 --- a/doc/installing.rst +++ b/doc/installing.rst @@ -25,6 +25,8 @@ For netCDF and IO - `pynio `__: for reading GRIB and other geoscience specific file formats - `zarr `__: for chunked, compressed, N-dimensional arrays. +- `netcdftime `__: recommended if you + want to encode/decode datetimes for non-standard calendars. For accelerating xarray ~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/whats-new.rst b/doc/whats-new.rst index bc5a5bb5ea4..1899a29c5e7 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -93,6 +93,10 @@ Enhancements - Speed of reindexing/alignment with dask array is orders of magnitude faster when inserting missing values (:issue:`1847`). By `Stephan Hoyer `_. +- Add ``netcdftime`` as an optional dependency of xarray. This allows for + encoding/decoding of datetimes with non-standard calendars without the + netCDF4 dependency (:issue:`1084`). + By `Joe Hamman `_. .. _Zarr: http://zarr.readthedocs.io/ diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 668fb53899d..1effdf18dac 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -443,7 +443,7 @@ def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT, lock=None, data_vars='all', coords='different', **kwargs): """Open multiple files as a single dataset. - Requires dask to be installed. See documentation for details on dask [1]. + Requires dask to be installed. See documentation for details on dask [1]. Attributes from the first dataset file are used for the combined dataset. Parameters diff --git a/xarray/coding/times.py b/xarray/coding/times.py index e00769af884..28afc46f660 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -40,6 +40,26 @@ 'milliseconds', 'microseconds']) +def _import_netcdftime(): + ''' + helper function handle the transition to netcdftime as a stand-alone + package + ''' + try: + # Try importing netcdftime directly + import netcdftime as nctime + if not hasattr(nctime, 'num2date'): + # must have gotten an old version from netcdf4-python + raise ImportError + except ImportError: + # in netCDF4 the num2date/date2num function are top-level api + try: + import netCDF4 as nctime + except ImportError: + raise ImportError("Failed to import netcdftime") + return nctime + + def _netcdf_to_numpy_timeunit(units): units = units.lower() if not units.endswith('s'): @@ -59,15 +79,15 @@ def _unpack_netcdf_time_units(units): return delta_units, ref_date -def _decode_datetime_with_netcdf4(num_dates, units, calendar): - import netCDF4 as nc4 +def _decode_datetime_with_netcdftime(num_dates, units, calendar): + nctime = _import_netcdftime() - dates = np.asarray(nc4.num2date(num_dates, units, calendar)) + dates = np.asarray(nctime.num2date(num_dates, units, calendar)) if (dates[np.nanargmin(num_dates)].year < 1678 or dates[np.nanargmax(num_dates)].year >= 2262): warnings.warn('Unable to decode time axis into full ' 'numpy.datetime64 objects, continuing using dummy ' - 'netCDF4.datetime objects instead, reason: dates out' + 'netcdftime.datetime objects instead, reason: dates out' ' of range', SerializationWarning, stacklevel=3) else: try: @@ -75,7 +95,7 @@ def _decode_datetime_with_netcdf4(num_dates, units, calendar): except ValueError as e: warnings.warn('Unable to decode time axis into full ' 'numpy.datetime64 objects, continuing using ' - 'dummy netCDF4.datetime objects instead, reason:' + 'dummy netcdftime.datetime objects instead, reason:' '{0}'.format(e), SerializationWarning, stacklevel=3) return dates @@ -111,7 +131,7 @@ def decode_cf_datetime(num_dates, units, calendar=None): numpy array of date time objects. For standard (Gregorian) calendars, this function uses vectorized - operations, which makes it much faster than netCDF4.num2date. In such a + operations, which makes it much faster than netcdftime.num2date. In such a case, the returned array will be of type np.datetime64. Note that time unit in `units` must not be smaller than microseconds and @@ -119,7 +139,7 @@ def decode_cf_datetime(num_dates, units, calendar=None): See also -------- - netCDF4.num2date + netcdftime.num2date """ num_dates = np.asarray(num_dates) flat_num_dates = num_dates.ravel() @@ -137,7 +157,7 @@ def decode_cf_datetime(num_dates, units, calendar=None): ref_date = pd.Timestamp(ref_date) except ValueError: # ValueError is raised by pd.Timestamp for non-ISO timestamp - # strings, in which case we fall back to using netCDF4 + # strings, in which case we fall back to using netcdftime raise OutOfBoundsDatetime # fixes: https://github.com/pydata/pandas/issues/14068 @@ -155,9 +175,8 @@ def decode_cf_datetime(num_dates, units, calendar=None): ref_date).values except (OutOfBoundsDatetime, OverflowError): - dates = _decode_datetime_with_netcdf4(flat_num_dates.astype(np.float), - units, - calendar) + dates = _decode_datetime_with_netcdftime( + flat_num_dates.astype(np.float), units, calendar) return dates.reshape(num_dates.shape) @@ -215,7 +234,7 @@ def infer_timedelta_units(deltas): def nctime_to_nptime(times): - """Given an array of netCDF4.datetime objects, return an array of + """Given an array of netcdftime.datetime objects, return an array of numpy.datetime64 objects of the same size""" times = np.asarray(times) new = np.empty(times.shape, dtype='M8[ns]') @@ -235,20 +254,20 @@ def _cleanup_netcdf_time_units(units): return units -def _encode_datetime_with_netcdf4(dates, units, calendar): - """Fallback method for encoding dates using netCDF4-python. +def _encode_datetime_with_netcdftime(dates, units, calendar): + """Fallback method for encoding dates using netcdftime. This method is more flexible than xarray's parsing using datetime64[ns] arrays but also slower because it loops over each element. """ - import netCDF4 as nc4 + nctime = _import_netcdftime() if np.issubdtype(dates.dtype, np.datetime64): # numpy's broken datetime conversion only works for us precision dates = dates.astype('M8[us]').astype(datetime) def encode_datetime(d): - return np.nan if d is None else nc4.date2num(d, units, calendar) + return np.nan if d is None else nctime.date2num(d, units, calendar) return np.vectorize(encode_datetime)(dates) @@ -268,7 +287,7 @@ def encode_cf_datetime(dates, units=None, calendar=None): See also -------- - netCDF4.date2num + netcdftime.date2num """ dates = np.asarray(dates) @@ -283,7 +302,7 @@ def encode_cf_datetime(dates, units=None, calendar=None): delta, ref_date = _unpack_netcdf_time_units(units) try: if calendar not in _STANDARD_CALENDARS or dates.dtype.kind == 'O': - # parse with netCDF4 instead + # parse with netcdftime instead raise OutOfBoundsDatetime assert dates.dtype == 'datetime64[ns]' @@ -293,7 +312,7 @@ def encode_cf_datetime(dates, units=None, calendar=None): num = (dates - ref_date) / time_delta except (OutOfBoundsDatetime, OverflowError): - num = _encode_datetime_with_netcdf4(dates, units, calendar) + num = _encode_datetime_with_netcdftime(dates, units, calendar) num = cast_to_int_if_safe(num) return (num, units, calendar) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index dadcdeff640..7c9528d741d 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -68,6 +68,7 @@ def _importorskip(modname, minversion=None): has_netCDF4, requires_netCDF4 = _importorskip('netCDF4') has_h5netcdf, requires_h5netcdf = _importorskip('h5netcdf') has_pynio, requires_pynio = _importorskip('Nio') +has_netcdftime, requires_netcdftime = _importorskip('netcdftime') has_dask, requires_dask = _importorskip('dask') has_bottleneck, requires_bottleneck = _importorskip('bottleneck') has_rasterio, requires_rasterio = _importorskip('rasterio') diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 6a509368017..4520e7aefef 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -13,8 +13,8 @@ from xarray.core import utils, indexing from xarray.testing import assert_identical from . import ( - TestCase, requires_netCDF4, unittest, raises_regex, IndexerMaker, - assert_array_equal) + TestCase, requires_netCDF4, requires_netcdftime, unittest, raises_regex, + IndexerMaker, assert_array_equal) from .test_backends import CFEncodedDataTest from xarray.core.pycompat import iteritems from xarray.backends.memory import InMemoryDataStore @@ -181,7 +181,7 @@ def test_decode_cf_with_conflicting_fill_missing_value(): assert_identical(actual, expected) -@requires_netCDF4 +@requires_netcdftime class TestEncodeCFVariable(TestCase): def test_incompatible_attributes(self): invalid_vars = [ @@ -237,7 +237,7 @@ def test_multidimensional_coordinates(self): assert 'coordinates' not in attrs -@requires_netCDF4 +@requires_netcdftime class TestDecodeCF(TestCase): def test_dataset(self): original = Dataset({ @@ -303,7 +303,7 @@ def test_invalid_time_units_raises_eagerly(self): with raises_regex(ValueError, 'unable to decode time'): decode_cf(ds) - @requires_netCDF4 + @requires_netcdftime def test_dataset_repr_with_netcdf4_datetimes(self): # regression test for #347 attrs = {'units': 'days since 0001-01-01', 'calendar': 'noleap'} @@ -316,7 +316,7 @@ def test_dataset_repr_with_netcdf4_datetimes(self): ds = decode_cf(Dataset({'time': ('time', [0, 1], attrs)})) assert '(time) datetime64[ns]' in repr(ds) - @requires_netCDF4 + @requires_netcdftime def test_decode_cf_datetime_transition_to_invalid(self): # manually create dataset with not-decoded date from datetime import datetime