From 3f6bf49d332735a31b087479df373e76adbe3c7a Mon Sep 17 00:00:00 2001 From: Antoine Gibek Date: Sun, 14 Jul 2024 16:43:33 +0200 Subject: [PATCH 1/6] feat: implement as discussed in PR #8322 --- xarray/coding/times.py | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 50a2ba93c09..9feca4f787a 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -625,25 +625,37 @@ def _encode_datetime_with_cftime(dates, units: str, calendar: str) -> np.ndarray if cftime is None: raise ModuleNotFoundError("No module named 'cftime'") + dates = np.array(dates) + + if dates.shape == (): + dates = dates.reshape(1) + if np.issubdtype(dates.dtype, np.datetime64): # numpy's broken datetime conversion only works for us precision dates = dates.astype("M8[us]").astype(datetime) - def encode_datetime(d): - # Since netCDF files do not support storing float128 values, we ensure - # that float64 values are used by setting longdouble=False in num2date. - # This try except logic can be removed when xarray's minimum version of - # cftime is at least 1.6.2. - try: - return ( - np.nan - if d is None - else cftime.date2num(d, units, calendar, longdouble=False) - ) - except TypeError: - return np.nan if d is None else cftime.date2num(d, units, calendar) + # Find all the None position + none_position = np.equal(dates, None) + + # Remove None from the dates and return new array + filtered_dates = dates[~none_position] - return reshape(np.array([encode_datetime(d) for d in ravel(dates)]), dates.shape) + # Since netCDF files do not support storing float128 values, we ensure + # that float64 values are used by setting longdouble=False in num2date. + # This try except logic can be removed when xarray's minimum version of + # cftime is at least 1.6.2. + try: + encoded_nums = cftime.date2num( + filtered_dates, units, calendar, longdouble=False + ) + except TypeError: + encoded_nums = cftime.date2num(filtered_dates, units, calendar) + + # Create a full matrix of NaN + # And fill the num dates in the not NaN or None position + result = np.full(dates.shape, np.nan) + result[np.nonzero(~none_position)] = encoded_nums + return result def cast_to_int_if_safe(num) -> np.ndarray: From f28fc7dd331419c57a7aa8c0c75ddf73e3b76703 Mon Sep 17 00:00:00 2001 From: Antoine Gibek Date: Sun, 14 Jul 2024 17:27:19 +0200 Subject: [PATCH 2/6] fix: Stick closer to the original function and add PR suggestions --- xarray/coding/times.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 9feca4f787a..14b233d61e2 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -626,18 +626,16 @@ def _encode_datetime_with_cftime(dates, units: str, calendar: str) -> np.ndarray raise ModuleNotFoundError("No module named 'cftime'") dates = np.array(dates) - - if dates.shape == (): - dates = dates.reshape(1) + original_shape = dates.shape if np.issubdtype(dates.dtype, np.datetime64): # numpy's broken datetime conversion only works for us precision dates = dates.astype("M8[us]").astype(datetime) + dates = np.atleast_1d(dates) + # Find all the None position none_position = np.equal(dates, None) - - # Remove None from the dates and return new array filtered_dates = dates[~none_position] # Since netCDF files do not support storing float128 values, we ensure @@ -651,11 +649,14 @@ def _encode_datetime_with_cftime(dates, units: str, calendar: str) -> np.ndarray except TypeError: encoded_nums = cftime.date2num(filtered_dates, units, calendar) + if filtered_dates.size == none_position.size: + return encoded_nums.reshape(original_shape) + # Create a full matrix of NaN # And fill the num dates in the not NaN or None position result = np.full(dates.shape, np.nan) result[np.nonzero(~none_position)] = encoded_nums - return result + return result.reshape(original_shape) def cast_to_int_if_safe(num) -> np.ndarray: From 06af81a6616872d313e83b478a3d614f6c4922e1 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 2 Aug 2024 09:29:53 -0600 Subject: [PATCH 3/6] Update xarray/coding/times.py Co-authored-by: Michael Niklas --- xarray/coding/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index f4554ee54b4..8ba348b49eb 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -628,7 +628,7 @@ def _encode_datetime_with_cftime(dates, units: str, calendar: str) -> np.ndarray if cftime is None: raise ModuleNotFoundError("No module named 'cftime'") - dates = np.array(dates) + dates = np.asarray(dates) original_shape = dates.shape if np.issubdtype(dates.dtype, np.datetime64): From 80bb5fe8cb58e237400ebb9a3832d95fd343b070 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 22 Oct 2024 14:28:56 -0600 Subject: [PATCH 4/6] Update xarray/coding/times.py --- xarray/coding/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 8ba348b49eb..765138b6657 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -638,7 +638,7 @@ def _encode_datetime_with_cftime(dates, units: str, calendar: str) -> np.ndarray dates = np.atleast_1d(dates) # Find all the None position - none_position = np.equal(dates, None) + none_position = dates == None filtered_dates = dates[~none_position] # Since netCDF files do not support storing float128 values, we ensure From 042638ddbb6b972853ae06231c69d27c2bd1726c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Fri, 23 May 2025 15:00:09 +0200 Subject: [PATCH 5/6] Update xarray/coding/times.py --- xarray/coding/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 7c3275e4b6a..17d4cd54ba1 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -959,7 +959,7 @@ def _encode_datetime_with_cftime(dates, units: str, calendar: str) -> np.ndarray dates = np.atleast_1d(dates) # Find all the None position - none_position = dates == None + none_position = dates == None # noqa: E711 filtered_dates = dates[~none_position] # Since netCDF files do not support storing float128 values, we ensure From c83f76ad32157c7792a52c5d61e44c20480a9754 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Tue, 27 May 2025 18:14:03 -0400 Subject: [PATCH 6/6] Add what's new entry --- doc/whats-new.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a936b2825a1..9610f991014 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -59,6 +59,11 @@ Bug fixes and prevents round-tripping them as :py:class:`numpy.datetime64` values (:pull:`10352`). By `Spencer Clark `_. +Performance +~~~~~~~~~~~ +- Speed up encoding of :py:class:`cftime.datetime` objects by roughly a factor + of three (:pull:`8324`). By `Antoine Gibek `_. + Documentation ~~~~~~~~~~~~~