diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 639789d69b0..6e3dbe76ae4 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -65,7 +65,8 @@ Performance in :py:class:`~xarray.indexing.VectorizedIndexer` and :py:class:`~xarray.indexing.OuterIndexer` (:issue:`10316`). By `Jesse Rusak `_. - +- Speed up encoding of :py:class:`cftime.datetime` objects by roughly a factor + of three (:pull:`8324`). By `Antoine Gibek `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 2112170f7fc..e6bc8ca59bd 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -946,41 +946,38 @@ def _encode_datetime_with_cftime(dates, units: str, calendar: str) -> np.ndarray else: cftime = attempt_import("cftime") + dates = np.asarray(dates) + original_shape = dates.shape + if np.issubdtype(dates.dtype, np.datetime64): # numpy's broken datetime conversion only works for us precision dates = dates.astype("M8[us]").astype(datetime) - def wrap_dt(dt): - # convert to cftime proleptic gregorian in case of datetime.datetime - # needed because of https://github.com/Unidata/cftime/issues/354 - if isinstance(dt, datetime) and not isinstance(dt, cftime.datetime): - dt = cftime.datetime( - dt.year, - dt.month, - dt.day, - dt.hour, - dt.minute, - dt.second, - dt.microsecond, - calendar="proleptic_gregorian", - ) - return dt + dates = np.atleast_1d(dates) - def encode_datetime(d): - # Since netCDF files do not support storing float128 values, we ensure - # that float64 values are used by setting longdouble=False in num2date. - # This try except logic can be removed when xarray's minimum version of - # cftime is at least 1.6.2. - try: - return ( - np.nan - if d is None - else cftime.date2num(wrap_dt(d), units, calendar, longdouble=False) - ) - except TypeError: - return np.nan if d is None else cftime.date2num(wrap_dt(d), units, calendar) + # Find all the None position + none_position = dates == None # noqa: E711 + filtered_dates = dates[~none_position] + + # Since netCDF files do not support storing float128 values, we ensure + # that float64 values are used by setting longdouble=False in num2date. + # This try except logic can be removed when xarray's minimum version of + # cftime is at least 1.6.2. + try: + encoded_nums = cftime.date2num( + filtered_dates, units, calendar, longdouble=False + ) + except TypeError: + encoded_nums = cftime.date2num(filtered_dates, units, calendar) + + if filtered_dates.size == none_position.size: + return encoded_nums.reshape(original_shape) - return reshape(np.array([encode_datetime(d) for d in ravel(dates)]), dates.shape) + # Create a full matrix of NaN + # And fill the num dates in the not NaN or None position + result = np.full(dates.shape, np.nan) + result[np.nonzero(~none_position)] = encoded_nums + return result.reshape(original_shape) def cast_to_int_if_safe(num) -> np.ndarray: