From 3e72df9f33de4e709bd0bd9470d149d39b65afea Mon Sep 17 00:00:00 2001 From: Phobos Date: Wed, 21 Apr 2021 18:27:34 -0400 Subject: [PATCH 01/30] dt.calendar and date_range --- xarray/coding/cftime_offsets.py | 97 +++++++++++++++++++++++++++++++++ xarray/core/accessor_dt.py | 9 +++ 2 files changed, 106 insertions(+) diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index c25d5296c41..13b6dded5c4 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -47,6 +47,7 @@ from typing import ClassVar, Optional import numpy as np +import pandas as pd from ..core.pdcompat import count_not_none from .cftimeindex import CFTimeIndex, _parse_iso8601_with_reso @@ -1016,3 +1017,99 @@ def cftime_range( dates = dates[:-1] return CFTimeIndex(dates, name=name) + + +def date_range( + start=None, + end=None, + periods=None, + freq="D", + tz=None, + normalize=False, + name=None, + closed=None, + calendar="standard", + use_cftime=None, +): + """Return a fixed frequency datetime index. + + The type (CFTimeIndex or pd.DatetimeIndex) of the returned index depends + on the requested calendar and on `use_cftime`. + + Parameters + ---------- + start : str or datetime-like, optional + Left bound for generating dates. + end : str or datetime-like, optional + Right bound for generating dates. + periods : int, optional + Number of periods to generate. + freq : str or None, default: "D" + Frequency strings can have multiples, e.g. "5H". + tz : str or tzinfo, optional + Time zone name for returning localized DatetimeIndex, for example + 'Asia/Hong_Kong'. By default, the resulting DatetimeIndex is + timezone-naive. Only valid with pandas DatetimeIndex. + normalize : bool, default: False + Normalize start/end dates to midnight before generating date range. + name : str, default: None + Name of the resulting index + closed : {"left", "right"} or None, default: None + Make the interval closed with respect to the given frequency to the + "left", "right", or both sides (None). + calendar : str, default: "standard" + Calendar type for the datetimes. + use_cftime : boolean, optional + If True, always return a CFTimeIndex. + If False, return a pd.DatetimeIndex if possible or raise a ValueError. + If None (default), return a pd.DatetimeIndex if possible, otherwise return a CFTimeIndex. + Defaults to False if `tz` is not None. + + Returns + ------- + CFTimeIndex or pd.DatetimeIndex + + See also + -------- + pandas.date_range + cftime_range + """ + if tz is not None: + use_cftime = False + + if ( + calendar in ["standard", "proleptic_gregorian", "gregorian"] + and use_cftime is not True + ): + try: + return pd.date_range( + start=start, + end=end, + periods=periods, + freq=freq, + tz=tz, + normalize=normalize, + name=name, + close=closed, + ) + except pd.errors.OutOfBoundsDatetime as err: + if use_cftime is False: + raise ValueError( + "Date range is invalid for pandas DatetimeIndex, try using `use_cftime=True`." + ) from err + elif use_cftime is False: + raise ValueError( + f"Invalid calendar {calendar} for pandas DatetimeIndex, try using `use_cftime=True`." + ) + + return cftime_range( + start=start, + end=end, + periods=periods, + freq=freq, + tz=tz, + normalize=normalize, + name=name, + close=closed, + calendar=calendar, + ) diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 1d4ef755fa0..00402c4f83b 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -4,6 +4,7 @@ import numpy as np import pandas as pd +from ..coding.cftimeindex import CFTimeIndex from .common import ( _contains_datetime_like_objects, is_np_datetime_like, @@ -451,6 +452,14 @@ def weekofyear(self): "is_leap_year", "Boolean indicator if the date belongs to a leap year.", bool ) + @property + def calendar(self): + index = self._obj.variable._data.array + if isinstance(index, CFTimeIndex): + return index.calendar + # else : pd.datetimeIndex + return "standard" + class TimedeltaAccessor(Properties): """Access Timedelta fields for DataArrays with Timedelta-like dtypes. From 1c37bbd02832f4b84da842537c791bae96489d5f Mon Sep 17 00:00:00 2001 From: Phobos Date: Wed, 28 Apr 2021 15:50:13 -0400 Subject: [PATCH 02/30] Migrate calendar utils from xclim | add dt.calendar --- doc/api.rst | 3 + xarray/coding/calendar_ops.py | 283 ++++++++++++++++++++++++++++ xarray/coding/cftime_offsets.py | 148 ++++++++++++++- xarray/coding/times.py | 63 ++++++- xarray/core/accessor_dt.py | 2 +- xarray/tests/test_accessor_dt.py | 10 + xarray/tests/test_calendar_ops.py | 152 +++++++++++++++ xarray/tests/test_cftime_offsets.py | 68 +++++++ 8 files changed, 713 insertions(+), 16 deletions(-) create mode 100644 xarray/coding/calendar_ops.py create mode 100644 xarray/tests/test_calendar_ops.py diff --git a/doc/api.rst b/doc/api.rst index 6288ce01803..a798fc58a52 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -512,6 +512,7 @@ Datetimelike properties DataArray.dt.season DataArray.dt.time DataArray.dt.date + DataArray.dt.calendar DataArray.dt.is_month_start DataArray.dt.is_month_end DataArray.dt.is_quarter_end @@ -835,6 +836,8 @@ Creating custom indexes :toctree: generated/ cftime_range + date_range + date_range_like Faceting -------- diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py new file mode 100644 index 00000000000..b355b9be1a3 --- /dev/null +++ b/xarray/coding/calendar_ops.py @@ -0,0 +1,283 @@ +from datetime import timedelta + +import numpy as np + +from .cftime_offsets import date_range_like, get_date_type +from .times import ( + _is_numpy_compatible_time_range, + _is_numpy_datetime, + _is_standard_calendar, + cftime_to_nptime, + convert_cftimes, +) + +try: + import cftime +except ImportError: + cftime = None + + +def _days_in_year(year, calendar, use_cftime=True): + """Return the number of days in the input year according to the input calendar.""" + return ( + ( + get_date_type(calendar, use_cftime=use_cftime)(year + 1, 1, 1) + - timedelta(days=1) + ) + .timetuple() + .tm_yday + ) + + +def convert_calendar( + ds, + target, + dim="time", + align_on=None, + missing=None, + use_cftime=None, +): + """Convert the Dataset or DataArray to another calendar. + + Only converts the individual timestamps, does not modify any data except in dropping invalid/surplus dates or inserting missing dates. + + If the source and target calendars are either no_leap, all_leap or a standard type, only the type of the time array is modified. + When converting to a leap year from a non-leap year, the 29th of February is removed from the array. + In the other direction the 29th of February will be missing in the output, unless `missing` is specified, in which case that value is inserted. + + For conversions involving `360_day` calendars, see Notes. + + This method is safe to use with sub-daily data as it doesn't touch the time part of the timestamps. + + Parameters + ---------- + ds : DataArray or Dataset + Input array/dataset with a time coordinate of a valid dtype (datetime64 or a cftime.datetime). + calendar : str + The target calendar name. + dim : str + Name of the time coordinate. + align_on : {None, 'date', 'year'} + Must be specified when either source or target is a `360_day` calendar, ignored otherwise. See Notes. + missing : Optional[any] + A value to use for filling in dates in the target that were missing in the source. + Default (None) is not to fill values, so the output time axis might be non-continuous. + use_cftime : boolean, optional + Whether to use cftime objects in the output, valid if `calendar` is one of {"proleptic_gregorian", "gregorian" or "standard"}. + If True, the new time axis uses cftime objects. If None (default), it uses numpy objects if the date range permits it, and cftime ones if not. + If False, it uses numpy objects or fails. + + Returns + ------- + Copy of source with the time coordinate converted to the target calendar. + If `missing` was None (default), invalid dates in the new calendar are dropped, but missing dates are not inserted. + If `missing` was given, the new data is reindexed to have a continuous time axis, filling missing datas with `missing`. + + Notes + ----- + If one of the source or target calendars is `360_day`, `align_on` must be specified and two options are offered. + + "year" + The dates are translated according to their rank in the year (dayofyear), ignoring their original month and day information, + meaning that the missing/surplus days are added/removed at regular intervals. + + From a `360_day` to a standard calendar, the output will be missing the following dates (day of year in parenthesis): + To a leap year: + January 31st (31), March 31st (91), June 1st (153), July 31st (213), September 31st (275) and November 30th (335). + To a non-leap year: + February 6th (36), April 19th (109), July 2nd (183), September 12th (255), November 25th (329). + + From standard calendar to a '360_day', the following dates in the source array will be dropped: + From a leap year: + January 31st (31), April 1st (92), June 1st (153), August 1st (214), September 31st (275), December 1st (336) + From a non-leap year: + February 6th (37), April 20th (110), July 2nd (183), September 13th (256), November 25th (329) + + This option is best used on daily and subdaily data. + + "date" + The month/day information is conserved and invalid dates are dropped from the output. This means that when converting from + a `360_day` to a standard calendar, all 31st (Jan, March, May, July, August, October and December) will be missing as there is no equivalent + dates in the `360_day` and the 29th (on non-leap years) and 30th of February will be dropped as there are no equivalent dates in + a standard calendar. + + This option is best used with data on a frequency coarser than daily. + """ + # In the following the calendar name "default" is an + # internal hack to mean pandas-backed standard calendar + from ..core.dataarray import DataArray + + time = ds[dim] # for convenience + + # Arguments Checks for target + if use_cftime is not True: + # Then we check is pandas is possible. + if _is_standard_calendar(target): + if _is_numpy_compatible_time_range(time): + # Conversion is possible with pandas, force False if it was None. + use_cftime = False + elif use_cftime is False: + raise ValueError( + "Source time range is not valid for numpy datetimes. Try using `use_cftime=True`." + ) + # else : Default to cftime + elif use_cftime is False: + # target calendar is ctime-only. + raise ValueError( + f"Calendar '{target}'' is only valid with cftime. Try using `use_cftime=True`." + ) + else: + use_cftime = True + + # Get source + source = time.dt.calendar + + src_cal = "default" if _is_numpy_datetime(time) else source + tgt_cal = target if use_cftime else "default" + if src_cal == tgt_cal: + return ds + + if (source == "360_day" or target == "360_day") and align_on is None: + raise ValueError( + "Argument `align_on` must be specified with either 'date' or " + "'year' when converting to or from a '360_day' calendar." + ) + + if source != "360_day" and target != "360_day": + align_on = "date" + + out = ds.copy() + + if align_on == "year": + # Special case for conversion involving 360_day calendar + # Instead of translating dates directly, this tries to keep the position within a year similar. + def _yearly_interp_doy(time): + # Returns the nearest day in the target calendar of the corresponding "decimal year" in the source calendar + yr = int(time.dt.year[0]) + return np.round( + _days_in_year(yr, target, use_cftime) + * time.dt.dayofyear + / _days_in_year(yr, source, use_cftime) + ).astype(int) + + def _convert_datetime(date, new_doy, calendar): + """Convert a datetime object to another calendar. + + Redefining the day of year (thus ignoring month and day information from the source datetime). + Nanosecond information are lost as cftime.datetime doesn't support them. + """ + new_date = cftime.num2date( + new_doy - 1, + f"days since {date.year}-01-01", + calendar=calendar if use_cftime else "standard", + ) + try: + return get_date_type(calendar, use_cftime)( + date.year, + new_date.month, + new_date.day, + date.hour, + date.minute, + date.second, + date.microsecond, + ) + except ValueError: + return np.nan + + new_doy = time.groupby(f"{dim}.year").map(_yearly_interp_doy) + + # Convert the source datetimes, but override the doy with our new doys + out[dim] = DataArray( + [ + _convert_datetime(date, newdoy, target) + for date, newdoy in zip(time.variable._data.array, new_doy) + ], + dims=(dim,), + name=dim, + ) + # Remove duplicate timestamps, happens when reducing the number of days + out = out.isel({dim: np.unique(out[dim], return_index=True)[1]}) + elif align_on == "date": + if use_cftime: + # Use the Index version of the 1D array + new_times = convert_cftimes( + time.variable._data.array, get_date_type(target), missing=np.NaN + ) + else: + new_times = cftime_to_nptime(time.values, raise_on_invalid=False) + out[dim] = new_times + + # Remove NaN that where put on invalid dates in target calendar + out = out.where(out[dim].notnull(), drop=True) + + if missing is not None: + time_target = date_range_like(time, calendar=target, use_cftime=use_cftime) + out = out.reindex({dim: time_target}, fill_value=missing) + + # Copy attrs but remove `calendar` if still present. + out[dim].attrs.update(time.attrs) + out[dim].attrs.pop("calendar", None) + return out + + +def _datetime_to_decimal_year(times, calendar=None): + """Convert a datetime DataArray to decimal years according to its calendar or the given one. + + Decimal years are the number of years since 0001-01-01 00:00:00 AD. + Ex: '2000-03-01 12:00' is 2000.1653 in a standard calendar, 2000.16301 in a "noleap" or 2000.16806 in a "360_day". + """ + from ..core.dataarray import DataArray + + calendar = calendar or times.dt.calendar + + if _is_numpy_datetime(times): + times = times.copy( + data=convert_cftimes(times.values, get_date_type("standard")) + ) + + def _make_index(time): + year = int(time.dt.year[0]) + doys = cftime.date2num(times, f"days since {year:04d}-01-01", calendar=calendar) + return DataArray( + year + doys / _days_in_year(year, calendar), + dims=time.dims, + coords=time.coords, + name="time", + ) + + return times.groupby("time.year").map(_make_index) + + +def interp_calendar(source, target, dim="time"): + """Interpolates a DataArray/Dataset to another calendar based on decimal year measure. + + Each timestamp in source and target are first converted to their decimal year equivalent + then source is interpolated on the target coordinate. The decimal year is the number of + years since 0001-01-01 AD. + Ex: '2000-03-01 12:00' is 2000.1653 in a standard calendar or 2000.16301 in a 'noleap' calendar. + + This method should be used with daily data or coarser. Sub-daily result will have a modified day cycle. + + Parameters + ---------- + source: Union[DataArray, Dataset] + The source data to interpolate, must have a time coordinate of a valid dtype (np.datetime64 or cftime objects) + target: DataArray + The target time coordinate of a valid dtype (np.datetime64 or cftime objects) + dim : str + The time coordinate name. + + Return + ------ + Union[DataArray, Dataset] + The source interpolated on the decimal years of target, + """ + cal_src = source[dim].dt.calendar + cal_tgt = target.dt.calendar + + out = source.copy() + out[dim] = _datetime_to_decimal_year(source[dim], calendar=cal_src).drop_vars(dim) + target_idx = _datetime_to_decimal_year(target, calendar=cal_tgt) + out = out.interp(**{dim: target_idx}) + out[dim] = target + return out diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 13b6dded5c4..06a74045fc4 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -41,7 +41,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import re -from datetime import timedelta +from datetime import datetime, timedelta from distutils.version import LooseVersion from functools import partial from typing import ClassVar, Optional @@ -51,16 +51,19 @@ from ..core.pdcompat import count_not_none from .cftimeindex import CFTimeIndex, _parse_iso8601_with_reso -from .times import format_cftime_datetime +from .times import _is_standard_calendar, format_cftime_datetime -def get_date_type(calendar): +def get_date_type(calendar, use_cftime=True): """Return the cftime date type for a given calendar name.""" try: import cftime except ImportError: raise ImportError("cftime is required for dates with non-standard calendars") else: + if _is_standard_calendar(calendar) and not use_cftime: + return pd.Timestamp + calendars = { "noleap": cftime.DatetimeNoLeap, "360_day": cftime.Datetime360Day, @@ -709,6 +712,8 @@ def to_cftime_datetime(date_str_or_date, calendar=None): return date elif isinstance(date_str_or_date, cftime.datetime): return date_str_or_date + elif isinstance(date_str_or_date, (datetime, pd.Timestamp)): + return cftime.DatetimeProlepticGregorian(*date_str_or_date.timetuple()) else: raise TypeError( "date_str_or_date must be a string or a " @@ -1074,13 +1079,12 @@ def date_range( pandas.date_range cftime_range """ + from .times import _is_standard_calendar + if tz is not None: use_cftime = False - if ( - calendar in ["standard", "proleptic_gregorian", "gregorian"] - and use_cftime is not True - ): + if _is_standard_calendar(calendar) and use_cftime is not True: try: return pd.date_range( start=start, @@ -1090,7 +1094,7 @@ def date_range( tz=tz, normalize=normalize, name=name, - close=closed, + closed=closed, ) except pd.errors.OutOfBoundsDatetime as err: if use_cftime is False: @@ -1107,9 +1111,133 @@ def date_range( end=end, periods=periods, freq=freq, - tz=tz, normalize=normalize, name=name, - close=closed, + closed=closed, + calendar=calendar, + ) + + +def date_range_like(source, calendar, use_cftime=None): + """Generate a datetime array with the same frequency, start and end as another one, but in a different calendar. + + Parameters + ---------- + source : DataArray or CFTimeIndex or pd.DatetimeIndex + 1D datetime array + calendar : str + New calendar name. + use_cftime : bool, optional + If True, the output uses cftime objects. If None (default), numpy objects are used if possible. + If False, numpy objects are used or an error is raised. + + Returns + ------- + DataArray + 1D datetime coordinate with the same start, end and frequency as the source, but in the new calendar. + The start date is assumed to exist in the target calendar. + If the end date doesn't exist, the code tries 1 and 2 calendar days before. + Exception when the source is in 360_day and the end of the range is the 30th of a 31-days month, + then the 31st is appended to the range. + """ + from .frequencies import infer_freq + from .times import ( + _is_numpy_compatible_time_range, + _is_numpy_datetime, + _is_standard_calendar, + ) + + freq = infer_freq(source) + if freq is None: + raise ValueError( + "`date_range_like` was unable to generate a range as the source frequency was not inferrable." + ) + + # Arguments Checks for target + if use_cftime is not True: + if _is_standard_calendar(calendar): + if _is_numpy_compatible_time_range(source): + # Conversion is possible with pandas, force False if it was None + use_cftime = False + elif use_cftime is False: + raise ValueError( + "Source time range is not valid for numpy datetimes. Try using `use_cftime=True`." + ) + # else : Default to cftime + elif use_cftime is False: + # target calendar is ctime-only. + raise ValueError( + f"Calendar '{calendar}' is only valid with cftime. Try using `use_cftime=True`." + ) + else: + use_cftime = True + + src_start = source.values.min() + src_end = source.values.max() + if _is_numpy_datetime(source): + src_cal = "default" + # We want to use datetime fields (datetime64 object don't have them) + src_start = pd.Timestamp(src_start) + src_end = pd.Timestamp(src_end) + else: + if isinstance(source, CFTimeIndex): + src_cal = source.calendar + else: # DataArray + src_cal = source.dt.calendar + + tgt_cal = calendar if use_cftime else "default" + if src_cal == tgt_cal: + return source + + date_type = get_date_type(calendar, use_cftime) + + def _convert_or_go_back(date): + try: + return date_type( + date.year, + date.month, + date.day, + date.hour, + date.minute, + date.second, + date.microsecond, + ) + except ValueError: + # Day is invalid, happens at the end of months, try again the day before + try: + return date_type( + date.year, + date.month, + date.day - 1, + date.hour, + date.minute, + date.second, + date.microsecond, + ) + except ValueError: + # Still invalid, happens for 360_day to non-leap february. Try again 2 days befordate. + return date_type( + date.year, + date.month, + date.day - 2, + date.hour, + date.minute, + date.second, + date.microsecond, + ) + + start = _convert_or_go_back(src_start) + end = _convert_or_go_back(src_end) + + # For the cases where the source ends on the end of the month, we expect the same in the new calendar. + if src_end.day == src_end.daysinmonth and isinstance( + to_offset(freq), (YearEnd, QuarterEnd, MonthEnd, Day) + ): + end = end.replace(day=end.daysinmonth) + + return date_range( + start=start.isoformat(), + end=end.isoformat(), + freq=freq, calendar=calendar, ) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 54400414ebc..e5d6c29a23e 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -72,6 +72,28 @@ def _is_standard_calendar(calendar): return calendar.lower() in _STANDARD_CALENDARS +def _is_numpy_datetime(times): + return times.dtype.char in ("M", "m") + + +def _is_numpy_compatible_time_range(times): + if _is_numpy_datetime(times): + return True + # Cftime object + tmin = times.min() + tmax = times.max() + if hasattr(tmin, "item"): # it is an array, the the element. + tmin = tmin.item() + tmax = tmax.item() + try: + pd.Timestamp(*tmin.timetuple()[:2], 1) + pd.Timestamp(*tmax.timetuple()[:2], 1) + except pd.errors.OutOfBoundsDatetime: + return False + else: + return True + + def _netcdf_to_numpy_timeunit(units): units = units.lower() if not units.endswith("s"): @@ -374,7 +396,7 @@ def infer_timedelta_units(deltas): return units -def cftime_to_nptime(times): +def cftime_to_nptime(times, raise_on_invalid=True): """Given an array of cftime.datetime objects, return an array of numpy.datetime64 objects of the same size""" times = np.asarray(times) @@ -389,14 +411,45 @@ def cftime_to_nptime(times): t.year, t.month, t.day, t.hour, t.minute, t.second, t.microsecond ) except ValueError as e: - raise ValueError( - "Cannot convert date {} to a date in the " - "standard calendar. Reason: {}.".format(t, e) - ) + if raise_on_invalid: + raise ValueError( + "Cannot convert date {} to a date in the " + "standard calendar. Reason: {}.".format(t, e) + ) + else: + dt = "NaT" new[i] = np.datetime64(dt) return new +def convert_cftimes(times, date_type, missing=None): + """Given an array of datetimes, return the same dates in another cftime date type. + + Useful to convert between calendars from numpy to cftime or between cftime calendars. + If missing is given, invalid dates are replaced by it, otherwise an error is raised. + """ + new = np.empty(times.shape, dtype="O") + if _is_numpy_datetime(times): + # Convert datetime64 objects to Timestamps + times = pd.DatetimeIndex(times) + for i, t in enumerate(times): + try: + dt = date_type( + t.year, t.month, t.day, t.hour, t.minute, t.second, t.microsecond + ) + except ValueError as e: + if missing is None: + raise ValueError( + "Cannot convert date {} to a date in the " + "standard calendar. Reason: {}.".format(t, e) + ) + else: + dt = missing + + new[i] = dt + return new + + def _cleanup_netcdf_time_units(units): delta, ref_date = _unpack_netcdf_time_units(units) try: diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 00402c4f83b..3705410c183 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -458,7 +458,7 @@ def calendar(self): if isinstance(index, CFTimeIndex): return index.calendar # else : pd.datetimeIndex - return "standard" + return "proleptic_gregorian" class TimedeltaAccessor(Properties): diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index adfa2bed33b..8c1367cedbc 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -115,6 +115,10 @@ def test_isocalendar(self, field, pandas_field): actual = self.data.time.dt.isocalendar()[field] assert_equal(expected, actual) + def test_calendar(self): + cal = self.data.time.dt.calendar + assert cal == "proleptic_gregorian" + def test_strftime(self): assert ( "2000-01-01 01:00:00" == self.data.time.dt.strftime("%Y-%m-%d %H:%M:%S")[1] @@ -425,6 +429,12 @@ def test_field_access(data, field): assert_equal(result, expected) +@requires_cftime +def test_calendar_cftime(data): + expected = data.time.values[0].calendar + assert data.time.dt.calendar == expected + + @requires_cftime def test_isocalendar_cftime(data): diff --git a/xarray/tests/test_calendar_ops.py b/xarray/tests/test_calendar_ops.py new file mode 100644 index 00000000000..d8b93ada2f4 --- /dev/null +++ b/xarray/tests/test_calendar_ops.py @@ -0,0 +1,152 @@ +import numpy as np +import pytest + +from xarray import DataArray, infer_freq +from xarray.coding.calendar_ops import convert_calendar, interp_calendar +from xarray.coding.cftime_offsets import date_range + +# Maximum day of year in each calendar. +max_doy = { + "default": 366, + "standard": 366, + "gregorian": 366, + "proleptic_gregorian": 366, + "julian": 366, + "noleap": 365, + "365_day": 365, + "all_leap": 366, + "366_day": 366, + "360_day": 360, +} + + +@pytest.mark.parametrize( + "source,target,use_cftime,freq", + [ + ("standard", "noleap", None, "D"), + ("noleap", "proleptic_gregorian", True, "D"), + ("noleap", "all_leap", None, "D"), + ("all_leap", "proleptic_gregorian", False, "4H"), + ], +) +def test_convert_calendar(source, target, use_cftime, freq): + src = DataArray( + date_range("2004-01-01", "2004-12-31", freq=freq, calendar=source), + dims=("time",), + name="time", + ) + da_src = DataArray( + np.linspace(0, 1, src.size), dims=("time",), coords={"time": src} + ) + + conv = convert_calendar(da_src, target, use_cftime=use_cftime) + + assert conv.time.dt.calendar == target + + +@pytest.mark.parametrize( + "source,target,freq", + [ + ("standard", "360_day", "D"), + ("360_day", "proleptic_gregorian", "D"), + ("proleptic_gregorian", "360_day", "4H"), + ], +) +@pytest.mark.parametrize("align_on", ["date", "year"]) +def test_convert_calendar_360_days(source, target, freq, align_on): + src = DataArray( + date_range("2004-01-01", "2004-12-30", freq=freq, calendar=source), + dims=("time",), + name="time", + ) + da_src = DataArray( + np.linspace(0, 1, src.size), dims=("time",), coords={"time": src} + ) + + conv = convert_calendar(da_src, target, align_on=align_on) + + assert conv.time.dt.calendar == target + + if align_on == "date": + np.testing.assert_array_equal( + conv.time.resample(time="M").last().dt.day, + [30, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30], + ) + elif target == "360_day": + np.testing.assert_array_equal( + conv.time.resample(time="M").last().dt.day, + [30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 29], + ) + else: + np.testing.assert_array_equal( + conv.time.resample(time="M").last().dt.day, + [30, 29, 30, 30, 31, 30, 30, 31, 30, 31, 29, 31], + ) + if source == "360_day" and align_on == "year": + assert conv.size == 360 if freq == "D" else 360 * 4 + else: + assert conv.size == 359 if freq == "D" else 359 * 4 + + +@pytest.mark.parametrize( + "source,target,freq", + [ + ("standard", "noleap", "D"), + ("noleap", "proleptic_gregorian", "4H"), + ("noleap", "all_leap", "M"), + ("360_day", "noleap", "D"), + ("noleap", "360_day", "D"), + ], +) +def test_convert_calendar_missing(source, target, freq): + src = DataArray( + date_range( + "2004-01-01", + "2004-12-31" if source != "360_day" else "2004-12-30", + freq=freq, + calendar=source, + ), + dims=("time",), + name="time", + ) + da_src = DataArray( + np.linspace(0, 1, src.size), dims=("time",), coords={"time": src} + ) + out = convert_calendar(da_src, target, missing=np.nan, align_on="date") + assert infer_freq(out.time) == freq + if source == "360_day": + assert out.time[-1].dt.day == 31 + + +@pytest.mark.parametrize( + "source,target", + [ + ("standard", "noleap"), + ("noleap", "proleptic_gregorian"), + ("standard", "360_day"), + ("360_day", "proleptic_gregorian"), + ("noleap", "all_leap"), + ("360_day", "noleap"), + ], +) +def test_interp_calendar(source, target): + src = DataArray( + date_range("2004-01-01", "2004-07-30", freq="D", calendar=source), + dims=("time",), + name="time", + ) + tgt = DataArray( + date_range("2004-01-01", "2004-07-30", freq="D", calendar=target), + dims=("time",), + name="time", + ) + da_src = DataArray( + np.linspace(0, 1, src.size), dims=("time",), coords={"time": src} + ) + conv = interp_calendar(da_src, tgt) + + assert conv.size == tgt.size + assert conv.time.dt.calendar == target + + np.testing.assert_almost_equal(conv.max(), 1, 2) + assert conv.min() == 0 diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index cdac9a2d002..bbe1507cd5e 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -22,10 +22,13 @@ YearEnd, _days_in_month, cftime_range, + date_range, + date_range_like, get_date_type, to_cftime_datetime, to_offset, ) +from xarray.coding.frequencies import infer_freq cftime = pytest.importorskip("cftime") @@ -1228,3 +1231,68 @@ def test_cftime_range_standard_calendar_refers_to_gregorian(): (result,) = cftime_range("2000", periods=1) assert isinstance(result, DatetimeGregorian) + + +@pytest.mark.parametrize( + "start,calendar,use_cftime,expcf", + [ + ("1990-01-01", "standard", None, False), + ("1990-01-01", "proleptic_gregorian", True, True), + ("1990-01-01", "noleap", None, True), + ("1990-01-01", "gregorian", False, False), + ("1400-01-01", "standard", None, True), + ("3400-01-01", "standard", None, True), + ], +) +def test_date_range(start, calendar, use_cftime, expcf): + dr = date_range( + start, periods=14, freq="D", calendar=calendar, use_cftime=use_cftime + ) + + if expcf: + assert isinstance(dr, CFTimeIndex) + else: + assert isinstance(dr, pd.DatetimeIndex) + + +def test_date_range_errors(): + with pytest.raises(ValueError, match="Date range is invalid"): + date_range( + "1400-01-01", periods=1, freq="D", calendar="standard", use_cftime=False + ) + + with pytest.raises(ValueError, match="Date range is invalid"): + date_range( + "2480-01-01", + periods=1, + freq="D", + calendar="proleptic_gregorian", + use_cftime=False, + ) + + with pytest.raises(ValueError, match="Invalid calendar "): + date_range( + "1900-01-01", periods=1, freq="D", calendar="noleap", use_cftime=False + ) + + +@pytest.mark.parametrize( + "args,cal_src,cal_tgt,use_cftime,exp0", + [ + (("2020-02-01", None, 12, "4M"), "standard", "noleap", None, "2020-02-28"), + (("2020-02-01", None, 12, "M"), "noleap", "gregorian", None, "2020-02-29"), + (("2020-02-28", None, 12, "3H"), "all_leap", "gregorian", False, "2020-02-28"), + (("2020-03-30", None, 12, "M"), "360_day", "gregorian", False, "2020-03-31"), + (("2020-03-31", None, 12, "M"), "gregorian", "360_day", None, "2020-03-30"), + ], +) +def test_date_range_like(args, cal_src, cal_tgt, use_cftime, exp0): + start, end, periods, freq = args + source = date_range(start, end, periods, freq, calendar=cal_src) + + out = date_range_like(source, cal_tgt, use_cftime=use_cftime) + + assert len(out) == periods + assert infer_freq(out) == freq + + assert out[0].isoformat().startswith(exp0) From 39079e356899d79a2677183f124d850c9659426b Mon Sep 17 00:00:00 2001 From: Phobos Date: Wed, 28 Apr 2021 16:02:24 -0400 Subject: [PATCH 03/30] upd whats new --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6acb231bcd5..b7451bb9623 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -92,6 +92,8 @@ New Features expand, ``False`` to always collapse, or ``default`` to expand unless over a pre-defined limit (:pull:`5126`). By `Tom White `_. +- Added calendar utilies :py:func:`convert_calendar`, :py:func:`interp_calendar`, :py:func:`date_range`, :py:func:`date_range_like` and :py:property:`DataArray.dt.calendar`. (:pull:`5233`). + By `Pascal Bourgault `_. Breaking changes ~~~~~~~~~~~~~~~~ From 11d15eec3c72f6fba0e7b350f55eff09ee48209a Mon Sep 17 00:00:00 2001 From: Phobos Date: Wed, 28 Apr 2021 16:06:04 -0400 Subject: [PATCH 04/30] skip calendar tests with no cftime --- xarray/tests/test_calendar_ops.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xarray/tests/test_calendar_ops.py b/xarray/tests/test_calendar_ops.py index d8b93ada2f4..00a9574bb89 100644 --- a/xarray/tests/test_calendar_ops.py +++ b/xarray/tests/test_calendar_ops.py @@ -5,6 +5,9 @@ from xarray.coding.calendar_ops import convert_calendar, interp_calendar from xarray.coding.cftime_offsets import date_range +cftime = pytest.importorskip("cftime") + + # Maximum day of year in each calendar. max_doy = { "default": 366, From d8ec02294f9c3e527f946647625117d93d7c90ea Mon Sep 17 00:00:00 2001 From: Phobos Date: Wed, 28 Apr 2021 16:36:41 -0400 Subject: [PATCH 05/30] add requires cftime 1.1.0 --- xarray/tests/test_calendar_ops.py | 3 +++ xarray/tests/test_cftime_offsets.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/xarray/tests/test_calendar_ops.py b/xarray/tests/test_calendar_ops.py index 00a9574bb89..c9fc80b81ea 100644 --- a/xarray/tests/test_calendar_ops.py +++ b/xarray/tests/test_calendar_ops.py @@ -5,6 +5,8 @@ from xarray.coding.calendar_ops import convert_calendar, interp_calendar from xarray.coding.cftime_offsets import date_range +from . import requires_cftime_1_1_0 + cftime = pytest.importorskip("cftime") @@ -91,6 +93,7 @@ def test_convert_calendar_360_days(source, target, freq, align_on): assert conv.size == 359 if freq == "D" else 359 * 4 +@requires_cftime_1_1_0 @pytest.mark.parametrize( "source,target,freq", [ diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index bbe1507cd5e..8db124cb450 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -30,6 +30,8 @@ ) from xarray.coding.frequencies import infer_freq +from . import requires_cftime_1_1_0 + cftime = pytest.importorskip("cftime") @@ -1276,6 +1278,7 @@ def test_date_range_errors(): ) +@requires_cftime_1_1_0 @pytest.mark.parametrize( "args,cal_src,cal_tgt,use_cftime,exp0", [ From 8fe0a9496fa743331d469da4cd02239ea902d633 Mon Sep 17 00:00:00 2001 From: Phobos Date: Wed, 28 Apr 2021 17:48:22 -0400 Subject: [PATCH 06/30] import date_ranges in main --- doc/whats-new.rst | 2 +- xarray/__init__.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b7451bb9623..4681873d29c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -92,7 +92,7 @@ New Features expand, ``False`` to always collapse, or ``default`` to expand unless over a pre-defined limit (:pull:`5126`). By `Tom White `_. -- Added calendar utilies :py:func:`convert_calendar`, :py:func:`interp_calendar`, :py:func:`date_range`, :py:func:`date_range_like` and :py:property:`DataArray.dt.calendar`. (:pull:`5233`). +- Added calendar utilies :py:func:`convert_calendar`, :py:func:`interp_calendar`, :py:func:`date_range`, :py:func:`date_range_like` and :py:attr:`DataArray.dt.calendar`. (:pull:`5233`). By `Pascal Bourgault `_. Breaking changes diff --git a/xarray/__init__.py b/xarray/__init__.py index 3886edc60e6..db7e0f183f9 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -11,7 +11,7 @@ ) from .backends.rasterio_ import open_rasterio from .backends.zarr import open_zarr -from .coding.cftime_offsets import cftime_range +from .coding.cftime_offsets import cftime_range, date_range, date_range_like from .coding.cftimeindex import CFTimeIndex from .coding.frequencies import infer_freq from .conventions import SerializationWarning, decode_cf @@ -52,6 +52,8 @@ "combine_by_coords", "combine_nested", "concat", + "date_range", + "date_range_like", "decode_cf", "dot", "cov", From f47f823b9ab529b921c9313faa94c94717452ad8 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Fri, 30 Apr 2021 10:31:31 -0400 Subject: [PATCH 07/30] Apply suggestions from code review Co-authored-by: Mathias Hauser --- xarray/coding/cftime_offsets.py | 2 +- xarray/coding/times.py | 2 +- xarray/tests/test_calendar_ops.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 06a74045fc4..21871f890ff 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -1215,7 +1215,7 @@ def _convert_or_go_back(date): date.microsecond, ) except ValueError: - # Still invalid, happens for 360_day to non-leap february. Try again 2 days befordate. + # Still invalid, happens for 360_day to non-leap february. Try again 2 days before date. return date_type( date.year, date.month, diff --git a/xarray/coding/times.py b/xarray/coding/times.py index e5d6c29a23e..ec4e6637294 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -73,7 +73,7 @@ def _is_standard_calendar(calendar): def _is_numpy_datetime(times): - return times.dtype.char in ("M", "m") + return times.dtype.kind in ("M", "m") def _is_numpy_compatible_time_range(times): diff --git a/xarray/tests/test_calendar_ops.py b/xarray/tests/test_calendar_ops.py index c9fc80b81ea..46874e7daf9 100644 --- a/xarray/tests/test_calendar_ops.py +++ b/xarray/tests/test_calendar_ops.py @@ -26,7 +26,7 @@ @pytest.mark.parametrize( - "source,target,use_cftime,freq", + "source, target, use_cftime, freq", [ ("standard", "noleap", None, "D"), ("noleap", "proleptic_gregorian", True, "D"), From c31100212c78d03e25b75038874164559ca7e697 Mon Sep 17 00:00:00 2001 From: Phobos Date: Fri, 30 Apr 2021 12:28:48 -0400 Subject: [PATCH 08/30] Add docs - use already existing is np datetime func --- doc/api-hidden.rst | 1 + xarray/coding/calendar_ops.py | 6 +++--- xarray/coding/cftime_offsets.py | 22 +++++++++++----------- xarray/coding/times.py | 10 +++------- xarray/core/accessor_dt.py | 4 ++++ 5 files changed, 22 insertions(+), 21 deletions(-) diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index 9d2481eed3c..b58825630e1 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -287,6 +287,7 @@ core.accessor_dt.DatetimeAccessor.floor core.accessor_dt.DatetimeAccessor.round core.accessor_dt.DatetimeAccessor.strftime + core.accessor_dt.DatetimeAccessor.calendar core.accessor_dt.DatetimeAccessor.date core.accessor_dt.DatetimeAccessor.day core.accessor_dt.DatetimeAccessor.dayofweek diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index b355b9be1a3..4d069ba8d19 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -2,10 +2,10 @@ import numpy as np +from ..core.common import is_np_datetime_like from .cftime_offsets import date_range_like, get_date_type from .times import ( _is_numpy_compatible_time_range, - _is_numpy_datetime, _is_standard_calendar, cftime_to_nptime, convert_cftimes, @@ -132,7 +132,7 @@ def convert_calendar( # Get source source = time.dt.calendar - src_cal = "default" if _is_numpy_datetime(time) else source + src_cal = "default" if is_np_datetime_like(time.dtype) else source tgt_cal = target if use_cftime else "default" if src_cal == tgt_cal: return ds @@ -230,7 +230,7 @@ def _datetime_to_decimal_year(times, calendar=None): calendar = calendar or times.dt.calendar - if _is_numpy_datetime(times): + if is_np_datetime_like(times.dtype): times = times.copy( data=convert_cftimes(times.values, get_date_type("standard")) ) diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 21871f890ff..b25e7801718 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -49,9 +49,14 @@ import numpy as np import pandas as pd +from ..core.common import is_np_datetime_like from ..core.pdcompat import count_not_none from .cftimeindex import CFTimeIndex, _parse_iso8601_with_reso -from .times import _is_standard_calendar, format_cftime_datetime +from .times import ( + _is_numpy_compatible_time_range, + _is_standard_calendar, + format_cftime_datetime, +) def get_date_type(calendar, use_cftime=True): @@ -1135,17 +1140,12 @@ def date_range_like(source, calendar, use_cftime=None): ------- DataArray 1D datetime coordinate with the same start, end and frequency as the source, but in the new calendar. - The start date is assumed to exist in the target calendar. - If the end date doesn't exist, the code tries 1 and 2 calendar days before. - Exception when the source is in 360_day and the end of the range is the 30th of a 31-days month, - then the 31st is appended to the range. + The start date is assumed to exist in the target calendar. + If the end date doesn't exist, the code tries 1 and 2 calendar days before. + Exception when the source is daily or coarser, then if the end of the input range is on + the last day of the month, the output range will also end on the last day of the month in the new calendar. """ from .frequencies import infer_freq - from .times import ( - _is_numpy_compatible_time_range, - _is_numpy_datetime, - _is_standard_calendar, - ) freq = infer_freq(source) if freq is None: @@ -1174,7 +1174,7 @@ def date_range_like(source, calendar, use_cftime=None): src_start = source.values.min() src_end = source.values.max() - if _is_numpy_datetime(source): + if is_np_datetime_like(source.dtype): src_cal = "default" # We want to use datetime fields (datetime64 object don't have them) src_start = pd.Timestamp(src_start) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index ec4e6637294..cabb9e9c811 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -9,7 +9,7 @@ from pandas.errors import OutOfBoundsDatetime from ..core import indexing -from ..core.common import contains_cftime_datetimes +from ..core.common import contains_cftime_datetimes, is_np_datetime_like from ..core.formatting import first_n_items, format_timestamp, last_item from ..core.variable import Variable from .variables import ( @@ -72,12 +72,8 @@ def _is_standard_calendar(calendar): return calendar.lower() in _STANDARD_CALENDARS -def _is_numpy_datetime(times): - return times.dtype.kind in ("M", "m") - - def _is_numpy_compatible_time_range(times): - if _is_numpy_datetime(times): + if is_np_datetime_like(times.dtype): return True # Cftime object tmin = times.min() @@ -429,7 +425,7 @@ def convert_cftimes(times, date_type, missing=None): If missing is given, invalid dates are replaced by it, otherwise an error is raised. """ new = np.empty(times.shape, dtype="O") - if _is_numpy_datetime(times): + if is_np_datetime_like(times.dtype): # Convert datetime64 objects to Timestamps times = pd.DatetimeIndex(times) for i, t in enumerate(times): diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 3705410c183..7c27f158f4a 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -454,6 +454,10 @@ def weekofyear(self): @property def calendar(self): + """The name of the calendar of the dates. + + Only relevant for arrays of cftime objects, returns "proleptic_gregorian" for arrays of numpy objects. + """ index = self._obj.variable._data.array if isinstance(index, CFTimeIndex): return index.calendar From d9e174a884691182fd64fc5d0a2722f9899a8bb9 Mon Sep 17 00:00:00 2001 From: Phobos Date: Fri, 7 May 2021 11:52:04 -0400 Subject: [PATCH 09/30] update from suggestions --- doc/whats-new.rst | 5 +- xarray/coding/calendar_ops.py | 41 ++++++---------- xarray/coding/cftime_offsets.py | 41 ++-------------- xarray/coding/times.py | 76 ++++++++++++++++++++++++----- xarray/core/accessor_dt.py | 8 +-- xarray/tests/test_calendar_ops.py | 4 +- xarray/tests/test_cftime_offsets.py | 4 +- 7 files changed, 90 insertions(+), 89 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0c30fdd5001..ad34d756850 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,7 +21,8 @@ v0.18.1 (unreleased) New Features ~~~~~~~~~~~~ - +- Added calendar utilies :py:func:`convert_calendar`, :py:func:`interp_calendar`, :py:func:`date_range`, :py:func:`date_range_like` and :py:attr:`DataArray.dt.calendar`. (:pull:`5233`). + By `Pascal Bourgault `_. Breaking changes ~~~~~~~~~~~~~~~~ @@ -150,8 +151,6 @@ New Features expand, ``False`` to always collapse, or ``default`` to expand unless over a pre-defined limit (:pull:`5126`). By `Tom White `_. -- Added calendar utilies :py:func:`convert_calendar`, :py:func:`interp_calendar`, :py:func:`date_range`, :py:func:`date_range_like` and :py:attr:`DataArray.dt.calendar`. (:pull:`5233`). - By `Pascal Bourgault `_. - Significant speedups in :py:meth:`Dataset.interp` and :py:meth:`DataArray.interp`. (:issue:`4739`, :pull:`4740`). By `Deepak Cherian `_. diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index 4d069ba8d19..3c1d8ff8407 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -4,12 +4,7 @@ from ..core.common import is_np_datetime_like from .cftime_offsets import date_range_like, get_date_type -from .times import ( - _is_numpy_compatible_time_range, - _is_standard_calendar, - cftime_to_nptime, - convert_cftimes, -) +from .times import _is_numpy_compatible_time_range, _is_standard_calendar, convert_times try: import cftime @@ -31,7 +26,7 @@ def _days_in_year(year, calendar, use_cftime=True): def convert_calendar( ds, - target, + calendar, dim="time", align_on=None, missing=None, @@ -112,7 +107,7 @@ def convert_calendar( # Arguments Checks for target if use_cftime is not True: # Then we check is pandas is possible. - if _is_standard_calendar(target): + if _is_standard_calendar(calendar): if _is_numpy_compatible_time_range(time): # Conversion is possible with pandas, force False if it was None. use_cftime = False @@ -124,7 +119,7 @@ def convert_calendar( elif use_cftime is False: # target calendar is ctime-only. raise ValueError( - f"Calendar '{target}'' is only valid with cftime. Try using `use_cftime=True`." + f"Calendar '{calendar}' is only valid with cftime. Try using `use_cftime=True`." ) else: use_cftime = True @@ -133,17 +128,17 @@ def convert_calendar( source = time.dt.calendar src_cal = "default" if is_np_datetime_like(time.dtype) else source - tgt_cal = target if use_cftime else "default" + tgt_cal = calendar if use_cftime else "default" if src_cal == tgt_cal: return ds - if (source == "360_day" or target == "360_day") and align_on is None: + if (source == "360_day" or calendar == "360_day") and align_on is None: raise ValueError( "Argument `align_on` must be specified with either 'date' or " "'year' when converting to or from a '360_day' calendar." ) - if source != "360_day" and target != "360_day": + if source != "360_day" and calendar != "360_day": align_on = "date" out = ds.copy() @@ -155,7 +150,7 @@ def _yearly_interp_doy(time): # Returns the nearest day in the target calendar of the corresponding "decimal year" in the source calendar yr = int(time.dt.year[0]) return np.round( - _days_in_year(yr, target, use_cftime) + _days_in_year(yr, calendar, use_cftime) * time.dt.dayofyear / _days_in_year(yr, source, use_cftime) ).astype(int) @@ -189,7 +184,7 @@ def _convert_datetime(date, new_doy, calendar): # Convert the source datetimes, but override the doy with our new doys out[dim] = DataArray( [ - _convert_datetime(date, newdoy, target) + _convert_datetime(date, newdoy, calendar) for date, newdoy in zip(time.variable._data.array, new_doy) ], dims=(dim,), @@ -198,20 +193,18 @@ def _convert_datetime(date, new_doy, calendar): # Remove duplicate timestamps, happens when reducing the number of days out = out.isel({dim: np.unique(out[dim], return_index=True)[1]}) elif align_on == "date": - if use_cftime: - # Use the Index version of the 1D array - new_times = convert_cftimes( - time.variable._data.array, get_date_type(target), missing=np.NaN - ) - else: - new_times = cftime_to_nptime(time.values, raise_on_invalid=False) + new_times = convert_times( + time.variable._data.array, + get_date_type(calendar, use_cftime=use_cftime), + raise_on_invalid=False, + ) out[dim] = new_times # Remove NaN that where put on invalid dates in target calendar out = out.where(out[dim].notnull(), drop=True) if missing is not None: - time_target = date_range_like(time, calendar=target, use_cftime=use_cftime) + time_target = date_range_like(time, calendar=calendar, use_cftime=use_cftime) out = out.reindex({dim: time_target}, fill_value=missing) # Copy attrs but remove `calendar` if still present. @@ -231,9 +224,7 @@ def _datetime_to_decimal_year(times, calendar=None): calendar = calendar or times.dt.calendar if is_np_datetime_like(times.dtype): - times = times.copy( - data=convert_cftimes(times.values, get_date_type("standard")) - ) + times = times.copy(data=convert_times(times.values, get_date_type("standard"))) def _make_index(time): year = int(time.dt.year[0]) diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index b25e7801718..74761ddba49 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -55,6 +55,7 @@ from .times import ( _is_numpy_compatible_time_range, _is_standard_calendar, + convert_time_or_go_back, format_cftime_datetime, ) @@ -1190,44 +1191,8 @@ def date_range_like(source, calendar, use_cftime=None): return source date_type = get_date_type(calendar, use_cftime) - - def _convert_or_go_back(date): - try: - return date_type( - date.year, - date.month, - date.day, - date.hour, - date.minute, - date.second, - date.microsecond, - ) - except ValueError: - # Day is invalid, happens at the end of months, try again the day before - try: - return date_type( - date.year, - date.month, - date.day - 1, - date.hour, - date.minute, - date.second, - date.microsecond, - ) - except ValueError: - # Still invalid, happens for 360_day to non-leap february. Try again 2 days before date. - return date_type( - date.year, - date.month, - date.day - 2, - date.hour, - date.minute, - date.second, - date.microsecond, - ) - - start = _convert_or_go_back(src_start) - end = _convert_or_go_back(src_end) + start = convert_time_or_go_back(src_start, date_type) + end = convert_time_or_go_back(src_end, date_type) # For the cases where the source ends on the end of the month, we expect the same in the new calendar. if src_end.day == src_end.daysinmonth and isinstance( diff --git a/xarray/coding/times.py b/xarray/coding/times.py index cabb9e9c811..66e26f56b36 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -76,14 +76,12 @@ def _is_numpy_compatible_time_range(times): if is_np_datetime_like(times.dtype): return True # Cftime object + times = np.asarray(times) tmin = times.min() tmax = times.max() - if hasattr(tmin, "item"): # it is an array, the the element. - tmin = tmin.item() - tmax = tmax.item() try: - pd.Timestamp(*tmin.timetuple()[:2], 1) - pd.Timestamp(*tmax.timetuple()[:2], 1) + convert_time_or_go_back(tmin, pd.Timestamp) + convert_time_or_go_back(tmax, pd.Timestamp) except pd.errors.OutOfBoundsDatetime: return False else: @@ -394,7 +392,10 @@ def infer_timedelta_units(deltas): def cftime_to_nptime(times, raise_on_invalid=True): """Given an array of cftime.datetime objects, return an array of - numpy.datetime64 objects of the same size""" + numpy.datetime64 objects of the same size + + If raise_on_invalid is True (default), invalid dates trigger a ValueError. + Otherwise, the invalid element is replaced by np.NaT.""" times = np.asarray(times) new = np.empty(times.shape, dtype="M8[ns]") for i, t in np.ndenumerate(times): @@ -418,12 +419,16 @@ def cftime_to_nptime(times, raise_on_invalid=True): return new -def convert_cftimes(times, date_type, missing=None): - """Given an array of datetimes, return the same dates in another cftime date type. +def convert_times(times, date_type, raise_on_invalid=True): + """Given an array of datetimes, return the same dates in another cftime or numpy date type. + + Useful to convert between calendars in numpy and cftime or between cftime calendars. - Useful to convert between calendars from numpy to cftime or between cftime calendars. - If missing is given, invalid dates are replaced by it, otherwise an error is raised. + If raise_on_valid is True (default), invalid dates trigger a ValueError. + Otherwise, the invalid element is replaced by np.NaN for cftime types and np.NaT for np.datetime64. """ + if date_type in (pd.Timestamp, np.datetime64): + return cftime_to_nptime(times, raise_on_invalid=raise_on_invalid) new = np.empty(times.shape, dtype="O") if is_np_datetime_like(times.dtype): # Convert datetime64 objects to Timestamps @@ -434,18 +439,63 @@ def convert_cftimes(times, date_type, missing=None): t.year, t.month, t.day, t.hour, t.minute, t.second, t.microsecond ) except ValueError as e: - if missing is None: + if raise_on_invalid: raise ValueError( "Cannot convert date {} to a date in the " - "standard calendar. Reason: {}.".format(t, e) + "{} calendar. Reason: {}.".format( + t, date_type(2000, 1, 1).calendar, e + ) ) else: - dt = missing + dt = np.NaN new[i] = dt return new +def convert_time_or_go_back(date, date_type): + """Convert a single date to a new date_type (cftime.datetime or pd.Timestamp). + + If the new date is invalid, it goes back a day and tries again. If it is still + invalid, goes back a second day. + + This is meant to convert end-of-month dates into a new calendar. + """ + try: + return date_type( + date.year, + date.month, + date.day, + date.hour, + date.minute, + date.second, + date.microsecond, + ) + except ValueError: + # Day is invalid, happens at the end of months, try again the day before + try: + return date_type( + date.year, + date.month, + date.day - 1, + date.hour, + date.minute, + date.second, + date.microsecond, + ) + except ValueError: + # Still invalid, happens for 360_day to non-leap february. Try again 2 days before date. + return date_type( + date.year, + date.month, + date.day - 2, + date.hour, + date.minute, + date.second, + date.microsecond, + ) + + def _cleanup_netcdf_time_units(units): delta, ref_date = _unpack_netcdf_time_units(units) try: diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 7c27f158f4a..a58994f174e 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd -from ..coding.cftimeindex import CFTimeIndex +from ..coding.times import infer_calendar_name from .common import ( _contains_datetime_like_objects, is_np_datetime_like, @@ -458,11 +458,7 @@ def calendar(self): Only relevant for arrays of cftime objects, returns "proleptic_gregorian" for arrays of numpy objects. """ - index = self._obj.variable._data.array - if isinstance(index, CFTimeIndex): - return index.calendar - # else : pd.datetimeIndex - return "proleptic_gregorian" + return infer_calendar_name(self._obj.data) class TimedeltaAccessor(Properties): diff --git a/xarray/tests/test_calendar_ops.py b/xarray/tests/test_calendar_ops.py index 46874e7daf9..6dfa7bab311 100644 --- a/xarray/tests/test_calendar_ops.py +++ b/xarray/tests/test_calendar_ops.py @@ -5,7 +5,7 @@ from xarray.coding.calendar_ops import convert_calendar, interp_calendar from xarray.coding.cftime_offsets import date_range -from . import requires_cftime_1_1_0 +from . import requires_cftime cftime = pytest.importorskip("cftime") @@ -93,7 +93,7 @@ def test_convert_calendar_360_days(source, target, freq, align_on): assert conv.size == 359 if freq == "D" else 359 * 4 -@requires_cftime_1_1_0 +@requires_cftime @pytest.mark.parametrize( "source,target,freq", [ diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 8db124cb450..73843f673aa 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -30,7 +30,7 @@ ) from xarray.coding.frequencies import infer_freq -from . import requires_cftime_1_1_0 +from . import requires_cftime cftime = pytest.importorskip("cftime") @@ -1278,7 +1278,7 @@ def test_date_range_errors(): ) -@requires_cftime_1_1_0 +@requires_cftime @pytest.mark.parametrize( "args,cal_src,cal_tgt,use_cftime,exp0", [ From 9d6254bab8208079b67710eb9c407391a4e87966 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 17 May 2021 12:00:48 -0400 Subject: [PATCH 10/30] Apply suggestions from code review Co-authored-by: Spencer Clark --- xarray/coding/calendar_ops.py | 17 ++++++++--------- xarray/coding/cftime_offsets.py | 6 ++---- xarray/coding/times.py | 4 ++-- xarray/core/accessor_dt.py | 2 +- 4 files changed, 13 insertions(+), 16 deletions(-) diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index 3c1d8ff8407..fe82cf8f007 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -47,7 +47,7 @@ def convert_calendar( Parameters ---------- ds : DataArray or Dataset - Input array/dataset with a time coordinate of a valid dtype (datetime64 or a cftime.datetime). + Input array/dataset with a time coordinate of a valid dtype (:py:class:`numpy.datetime64` or :py:class:`cftime.datetime`). calendar : str The target calendar name. dim : str @@ -58,22 +58,22 @@ def convert_calendar( A value to use for filling in dates in the target that were missing in the source. Default (None) is not to fill values, so the output time axis might be non-continuous. use_cftime : boolean, optional - Whether to use cftime objects in the output, valid if `calendar` is one of {"proleptic_gregorian", "gregorian" or "standard"}. - If True, the new time axis uses cftime objects. If None (default), it uses numpy objects if the date range permits it, and cftime ones if not. - If False, it uses numpy objects or fails. + Whether to use cftime objects in the output, only used if `calendar` is one of {"proleptic_gregorian", "gregorian" or "standard"}. + If True, the new time axis uses cftime objects. If None (default), it uses :py:class:`numpy.datetime64` values if the date range permits it, and :py:class:`cftime.datetime` objects if not. + If False, it uses :py:class:`numpy.datetime64` or fails. Returns ------- Copy of source with the time coordinate converted to the target calendar. If `missing` was None (default), invalid dates in the new calendar are dropped, but missing dates are not inserted. - If `missing` was given, the new data is reindexed to have a continuous time axis, filling missing datas with `missing`. + If `missing` was given, the new data is reindexed to have a continuous time axis, filling missing datapoints with `missing`. Notes ----- If one of the source or target calendars is `360_day`, `align_on` must be specified and two options are offered. "year" - The dates are translated according to their rank in the year (dayofyear), ignoring their original month and day information, + The dates are translated according to their relative position in the year, ignoring their original month and day information, meaning that the missing/surplus days are added/removed at regular intervals. From a `360_day` to a standard calendar, the output will be missing the following dates (day of year in parenthesis): @@ -124,7 +124,6 @@ def convert_calendar( else: use_cftime = True - # Get source source = time.dt.calendar src_cal = "default" if is_np_datetime_like(time.dtype) else source @@ -194,7 +193,7 @@ def _convert_datetime(date, new_doy, calendar): out = out.isel({dim: np.unique(out[dim], return_index=True)[1]}) elif align_on == "date": new_times = convert_times( - time.variable._data.array, + time.data get_date_type(calendar, use_cftime=use_cftime), raise_on_invalid=False, ) @@ -252,7 +251,7 @@ def interp_calendar(source, target, dim="time"): Parameters ---------- source: Union[DataArray, Dataset] - The source data to interpolate, must have a time coordinate of a valid dtype (np.datetime64 or cftime objects) + The source data to interpolate, must have a time coordinate of a valid dtype (:py:class:`numpy.datetime64` or :py:class:`cftime.datetime` objects) target: DataArray The target time coordinate of a valid dtype (np.datetime64 or cftime objects) dim : str diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 74761ddba49..30cc338d2c3 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -1134,8 +1134,8 @@ def date_range_like(source, calendar, use_cftime=None): calendar : str New calendar name. use_cftime : bool, optional - If True, the output uses cftime objects. If None (default), numpy objects are used if possible. - If False, numpy objects are used or an error is raised. + If True, the output uses :py:class`cftime.datetime` objects. If None (default), :py:class:`numpy.datetime64` values are used if possible. + If False, :py:class:`numpy.datetime64` values are used or an error is raised. Returns ------- @@ -1164,9 +1164,7 @@ def date_range_like(source, calendar, use_cftime=None): raise ValueError( "Source time range is not valid for numpy datetimes. Try using `use_cftime=True`." ) - # else : Default to cftime elif use_cftime is False: - # target calendar is ctime-only. raise ValueError( f"Calendar '{calendar}' is only valid with cftime. Try using `use_cftime=True`." ) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 66e26f56b36..0fff1c1e1cb 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -75,7 +75,7 @@ def _is_standard_calendar(calendar): def _is_numpy_compatible_time_range(times): if is_np_datetime_like(times.dtype): return True - # Cftime object + # times array contains cftime objects times = np.asarray(times) tmin = times.min() tmax = times.max() @@ -431,7 +431,7 @@ def convert_times(times, date_type, raise_on_invalid=True): return cftime_to_nptime(times, raise_on_invalid=raise_on_invalid) new = np.empty(times.shape, dtype="O") if is_np_datetime_like(times.dtype): - # Convert datetime64 objects to Timestamps + # Convert datetime64 objects to Timestamps since those have year, month, day, etc. attributes times = pd.DatetimeIndex(times) for i, t in enumerate(times): try: diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index a58994f174e..63e3c485083 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -456,7 +456,7 @@ def weekofyear(self): def calendar(self): """The name of the calendar of the dates. - Only relevant for arrays of cftime objects, returns "proleptic_gregorian" for arrays of numpy objects. + Only relevant for arrays of :py:class:`cftime.datetime` objects, returns "proleptic_gregorian" for arrays of :py:class:`numpy.datetime64` values. """ return infer_calendar_name(self._obj.data) From 0fce9cbf77f84404d7e57c78988ff256df311ac0 Mon Sep 17 00:00:00 2001 From: Phobos Date: Mon, 17 May 2021 13:14:04 -0400 Subject: [PATCH 11/30] Modifications following review --- doc/whats-new.rst | 5 +-- xarray/coding/calendar_ops.py | 64 ++++++++++++----------------- xarray/coding/cftime_offsets.py | 36 ++++++++-------- xarray/coding/times.py | 47 +++++++++++++++++++-- xarray/tests/test_accessor_dt.py | 37 +++++++++++++++++ xarray/tests/test_cftime_offsets.py | 21 ++++------ 6 files changed, 135 insertions(+), 75 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b62e2e258c9..ba3aa20c7e1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,9 +21,6 @@ v0.18.1 (unreleased) New Features ~~~~~~~~~~~~ -- Added calendar utilies :py:func:`convert_calendar`, :py:func:`interp_calendar`, :py:func:`date_range`, :py:func:`date_range_like` and :py:attr:`DataArray.dt.calendar`. (:pull:`5233`). - By `Pascal Bourgault `_. - - Implement :py:meth:`DataArray.drop_duplicates` to remove duplicate dimension values (:pull:`5239`). By `Andrew Huang `_. @@ -35,6 +32,8 @@ New Features By `Jimmy Westling `_. - Raise more informative error when decoding time variables with invalid reference dates. (:issue:`5199`, :pull:`5288`). By `Giacomo Caria `_. +- Added calendar utilies :py:func:`convert_calendar`, :py:func:`interp_calendar`, :py:func:`date_range`, :py:func:`date_range_like` and :py:attr:`DataArray.dt.calendar`. (:pull:`5233`). + By `Pascal Bourgault `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index fe82cf8f007..a9154e7ec27 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -1,10 +1,10 @@ -from datetime import timedelta - import numpy as np +import pandas as pd -from ..core.common import is_np_datetime_like +from ..core.common import _contains_datetime_like_objects, is_np_datetime_like from .cftime_offsets import date_range_like, get_date_type -from .times import _is_numpy_compatible_time_range, _is_standard_calendar, convert_times +from .cftimeindex import CFTimeIndex +from .times import _should_cftime_be_used, convert_times try: import cftime @@ -14,14 +14,9 @@ def _days_in_year(year, calendar, use_cftime=True): """Return the number of days in the input year according to the input calendar.""" - return ( - ( - get_date_type(calendar, use_cftime=use_cftime)(year + 1, 1, 1) - - timedelta(days=1) - ) - .timetuple() - .tm_yday - ) + date_type = get_date_type(calendar, use_cftime=use_cftime) + difference = date_type(year + 1, 1, 1) - date_type(year, 1, 1) + return difference.days def convert_calendar( @@ -103,31 +98,15 @@ def convert_calendar( from ..core.dataarray import DataArray time = ds[dim] # for convenience + if not _contains_datetime_like_objects(time): + raise ValueError(f"Coordinate {dim} must contain datetime objects.") - # Arguments Checks for target - if use_cftime is not True: - # Then we check is pandas is possible. - if _is_standard_calendar(calendar): - if _is_numpy_compatible_time_range(time): - # Conversion is possible with pandas, force False if it was None. - use_cftime = False - elif use_cftime is False: - raise ValueError( - "Source time range is not valid for numpy datetimes. Try using `use_cftime=True`." - ) - # else : Default to cftime - elif use_cftime is False: - # target calendar is ctime-only. - raise ValueError( - f"Calendar '{calendar}' is only valid with cftime. Try using `use_cftime=True`." - ) - else: - use_cftime = True + use_cftime = _should_cftime_be_used(time, calendar, use_cftime) source = time.dt.calendar - src_cal = "default" if is_np_datetime_like(time.dtype) else source - tgt_cal = calendar if use_cftime else "default" + src_cal = "datetime64" if is_np_datetime_like(time.dtype) else source + tgt_cal = calendar if use_cftime else "datetime64" if src_cal == tgt_cal: return ds @@ -193,7 +172,7 @@ def _convert_datetime(date, new_doy, calendar): out = out.isel({dim: np.unique(out[dim], return_index=True)[1]}) elif align_on == "date": new_times = convert_times( - time.data + time.data, get_date_type(calendar, use_cftime=use_cftime), raise_on_invalid=False, ) @@ -227,7 +206,7 @@ def _datetime_to_decimal_year(times, calendar=None): def _make_index(time): year = int(time.dt.year[0]) - doys = cftime.date2num(times, f"days since {year:04d}-01-01", calendar=calendar) + doys = cftime.date2num(time, f"days since {year:04d}-01-01", calendar=calendar) return DataArray( year + doys / _days_in_year(year, calendar), dims=time.dims, @@ -246,13 +225,13 @@ def interp_calendar(source, target, dim="time"): years since 0001-01-01 AD. Ex: '2000-03-01 12:00' is 2000.1653 in a standard calendar or 2000.16301 in a 'noleap' calendar. - This method should be used with daily data or coarser. Sub-daily result will have a modified day cycle. + This method should only be used when the time (HH:MM:SS) information of time coordinate is not important. Parameters ---------- source: Union[DataArray, Dataset] The source data to interpolate, must have a time coordinate of a valid dtype (:py:class:`numpy.datetime64` or :py:class:`cftime.datetime` objects) - target: DataArray + target: DataArray or DatetimeIndex or CFTimeIndex The target time coordinate of a valid dtype (np.datetime64 or cftime objects) dim : str The time coordinate name. @@ -262,7 +241,18 @@ def interp_calendar(source, target, dim="time"): Union[DataArray, Dataset] The source interpolated on the decimal years of target, """ + from ..core.dataarray import DataArray + + if not _contains_datetime_like_objects( + source[dim] + ) or not _contains_datetime_like_objects(target): + raise ValueError( + f"Both 'source.{dim}' and 'target' must contain datetime objects." + ) + cal_src = source[dim].dt.calendar + if isinstance(target, (pd.DatetimeIndex, CFTimeIndex)): + target = DataArray(target, dims=(dim,), name=dim) cal_tgt = target.dt.calendar out = source.copy() diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 71a24bb790a..4555aea76b3 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -49,12 +49,12 @@ import numpy as np import pandas as pd -from ..core.common import is_np_datetime_like +from ..core.common import _contains_datetime_like_objects, is_np_datetime_like from ..core.pdcompat import count_not_none from .cftimeindex import CFTimeIndex, _parse_iso8601_with_reso from .times import ( - _is_numpy_compatible_time_range, _is_standard_calendar, + _should_cftime_be_used, convert_time_or_go_back, format_cftime_datetime, ) @@ -1138,35 +1138,31 @@ def date_range_like(source, calendar, use_cftime=None): Exception when the source is daily or coarser, then if the end of the input range is on the last day of the month, the output range will also end on the last day of the month in the new calendar. """ + from ..core.dataarray import DataArray from .frequencies import infer_freq + # Source is a pd.DatetimeImdex or a CFTimeIndex or a DataArray that is 1D AND contains datetime objs. + if not isinstance(source, (pd.DatetimeIndex, CFTimeIndex)) and ( + isinstance(source, DataArray) + and (source.ndim != 1) + or not _contains_datetime_like_objects(source) + ): + raise ValueError( + "'source' must be a 1D array of datetime objects for inferring its range." + ) + freq = infer_freq(source) if freq is None: raise ValueError( "`date_range_like` was unable to generate a range as the source frequency was not inferrable." ) - # Arguments Checks for target - if use_cftime is not True: - if _is_standard_calendar(calendar): - if _is_numpy_compatible_time_range(source): - # Conversion is possible with pandas, force False if it was None - use_cftime = False - elif use_cftime is False: - raise ValueError( - "Source time range is not valid for numpy datetimes. Try using `use_cftime=True`." - ) - elif use_cftime is False: - raise ValueError( - f"Calendar '{calendar}' is only valid with cftime. Try using `use_cftime=True`." - ) - else: - use_cftime = True + use_cftime = _should_cftime_be_used(source, calendar, use_cftime) src_start = source.values.min() src_end = source.values.max() if is_np_datetime_like(source.dtype): - src_cal = "default" + src_cal = "datetime64" # We want to use datetime fields (datetime64 object don't have them) src_start = pd.Timestamp(src_start) src_end = pd.Timestamp(src_end) @@ -1176,7 +1172,7 @@ def date_range_like(source, calendar, use_cftime=None): else: # DataArray src_cal = source.dt.calendar - tgt_cal = calendar if use_cftime else "default" + tgt_cal = calendar if use_cftime else "datetime64" if src_cal == tgt_cal: return source diff --git a/xarray/coding/times.py b/xarray/coding/times.py index fc12a735e58..49ddda5f0b2 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -11,6 +11,7 @@ from ..core import indexing from ..core.common import contains_cftime_datetimes, is_np_datetime_like from ..core.formatting import first_n_items, format_timestamp, last_item +from ..core.pycompat import is_duck_dask_array from ..core.variable import Variable from .variables import ( SerializationWarning, @@ -340,10 +341,25 @@ def _infer_time_units_from_diff(unique_timedeltas): def infer_calendar_name(dates): """Given an array of datetimes, infer the CF calendar name""" - if np.asarray(dates).dtype == "datetime64[ns]": + if is_np_datetime_like(dates.dtype): return "proleptic_gregorian" - else: - return np.asarray(dates).ravel()[0].calendar + elif dates.dtype == np.dtype("O") and dates.size > 0: + # Logic copied from core.common.contains_cftime_datetimes. + try: + from cftime import datetime as cftime_datetime + except ImportError: + pass + else: + sample = dates.ravel()[0] + if is_duck_dask_array(sample): + sample = sample.compute() + if isinstance(sample, np.ndarray): + sample = sample.item() + if isinstance(sample, cftime_datetime): + return sample.calendar + + # Error raise if dtype is neither datetime or "O", if cftime is not importable, and if element of 'O' dtype is not cftime. + raise ValueError("Array does not contain datetime objects.") def infer_datetime_units(dates): @@ -497,6 +513,31 @@ def convert_time_or_go_back(date, date_type): ) +def _should_cftime_be_used(source, target_cal, use_cftime): + """Return whether conversion of the source to the target calendar should result in a cftime-backed array. + + Source is a 1D datetime array, target_cal a string (calendar name) and use_cftime is a boolean or None. + If use_cftime is None, this returns True if the source's range and target calendar are convertible to np.datetime64 objects. + """ + # Arguments Checks for target + if use_cftime is not True: + if _is_standard_calendar(target_cal): + if _is_numpy_compatible_time_range(source): + # Conversion is possible with pandas, force False if it was None + use_cftime = False + elif use_cftime is False: + raise ValueError( + "Source time range is not valid for numpy datetimes. Try using `use_cftime=True`." + ) + elif use_cftime is False: + raise ValueError( + f"Calendar '{target_cal}' is only valid with cftime. Try using `use_cftime=True`." + ) + else: + use_cftime = True + return use_cftime + + def _cleanup_netcdf_time_units(units): delta, ref_date = _unpack_netcdf_time_units(units) try: diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 11cd7f8dd26..e44eaea5547 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -433,6 +433,43 @@ def test_calendar_cftime(data): expected = data.time.values[0].calendar assert data.time.dt.calendar == expected + # 2D np datetime: + data = xr.DataArray( + np.random.randint(1, 1000000, size=(4, 5)).astype(" Date: Mon, 17 May 2021 15:06:20 -0400 Subject: [PATCH 12/30] Add DataArray and Dataset methods --- xarray/core/dataarray.py | 111 +++++++++++++++++++++++++++++++++++++++ xarray/core/dataset.py | 110 +++++++++++++++++++++++++++++++++++++- 2 files changed, 220 insertions(+), 1 deletion(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 22cb03fd11c..c9315ec950e 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -21,6 +21,8 @@ import numpy as np import pandas as pd +from ..coding.calendar_ops import convert_calendar, interp_calendar +from ..coding.cftimeindex import CFTimeIndex from ..plot.plot import _PlotMethods from . import ( computation, @@ -4599,6 +4601,115 @@ def drop_duplicates( indexes = {dim: ~self.get_index(dim).duplicated(keep=keep)} return self.isel(indexes) + def convert_calendar( + self, + calendar: str, + dim: str = "time", + align_on: Optional[str] = None, + missing: Optional[Any] = None, + use_cftime: Optional[bool] = None, + ) -> "DataArray": + """Convert the DataArray to another calendar. + + Only converts the individual timestamps, does not modify any data except in dropping invalid/surplus dates or inserting missing dates. + + If the source and target calendars are either no_leap, all_leap or a standard type, only the type of the time array is modified. + When converting to a leap year from a non-leap year, the 29th of February is removed from the array. + In the other direction the 29th of February will be missing in the output, unless `missing` is specified, in which case that value is inserted. + + For conversions involving `360_day` calendars, see Notes. + + This method is safe to use with sub-daily data as it doesn't touch the time part of the timestamps. + + Parameters + --------- + calendar : str + The target calendar name. + dim : str + Name of the time coordinate. + align_on : {None, 'date', 'year'} + Must be specified when either source or target is a `360_day` calendar, ignored otherwise. See Notes. + missing : Optional[any] + A value to use for filling in dates in the target that were missing in the source. + Default (None) is not to fill values, so the output time axis might be non-continuous. + use_cftime : boolean, optional + Whether to use cftime objects in the output, only used if `calendar` is one of {"proleptic_gregorian", "gregorian" or "standard"}. + If True, the new time axis uses cftime objects. If None (default), it uses :py:class:`numpy.datetime64` values if the date range permits it, and :py:class:`cftime.datetime` objects if not. + If False, it uses :py:class:`numpy.datetime64` or fails. + + Returns + ------- + Copy of the dataarray with the time coordinate converted to the target calendar. + If `missing` was None (default), invalid dates in the new calendar are dropped, but missing dates are not inserted. + If `missing` was given, the new data is reindexed to have a continuous time axis, filling missing datapoints with `missing`. + + Notes + ----- + If one of the source or target calendars is `360_day`, `align_on` must be specified and two options are offered. + + "year" + The dates are translated according to their relative position in the year, ignoring their original month and day information, + meaning that the missing/surplus days are added/removed at regular intervals. + + From a `360_day` to a standard calendar, the output will be missing the following dates (day of year in parenthesis): + To a leap year: + January 31st (31), March 31st (91), June 1st (153), July 31st (213), September 31st (275) and November 30th (335). + To a non-leap year: + February 6th (36), April 19th (109), July 2nd (183), September 12th (255), November 25th (329). + + From standard calendar to a '360_day', the following dates in the source array will be dropped: + From a leap year: + January 31st (31), April 1st (92), June 1st (153), August 1st (214), September 31st (275), December 1st (336) + From a non-leap year: + February 6th (37), April 20th (110), July 2nd (183), September 13th (256), November 25th (329) + + This option is best used on daily and subdaily data. + + "date" + The month/day information is conserved and invalid dates are dropped from the output. This means that when converting from + a `360_day` to a standard calendar, all 31st (Jan, March, May, July, August, October and December) will be missing as there is no equivalent + dates in the `360_day` and the 29th (on non-leap years) and 30th of February will be dropped as there are no equivalent dates in + a standard calendar. + + This option is best used with data on a frequency coarser than daily. + """ + return convert_calendar( + self, + calendar, + dim=dim, + align_on=align_on, + missing=missing, + use_cftime=use_cftime, + ) + + def interp_calendar( + self, + target: Union[pd.DatetimeIndex, CFTimeIndex, "DataArray"], + dim: str = "time", + ) -> "DataArray": + """Interpolates the DataArray to another calendar based on decimal year measure. + + Each timestamp in the dataset and the target are first converted to their decimal year equivalent + then source is interpolated on the target coordinate. The decimal year is the number of + years since 0001-01-01 AD. + Ex: '2000-03-01 12:00' is 2000.1653 in a standard calendar or 2000.16301 in a 'noleap' calendar. + + This method should only be used when the time (HH:MM:SS) information of time coordinate is not important. + + Parameters + ---------- + target: DataArray or DatetimeIndex or CFTimeIndex + The target time coordinate of a valid dtype (np.datetime64 or cftime objects) + dim : str + The time coordinate name. + + Return + ------ + Dataset + The source interpolated on the decimal years of target, + """ + return interp_calendar(self, target, dim=dim) + # this needs to be at the end, or mypy will confuse with `str` # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names str = utils.UncachedAccessor(StringAccessor) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index f59b9b6bea5..3dc516aac09 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -35,7 +35,8 @@ import xarray as xr -from ..coding.cftimeindex import _parse_array_of_cftime_strings +from ..coding.calendar_ops import convert_calendar, interp_calendar +from ..coding.cftimeindex import CFTimeIndex, _parse_array_of_cftime_strings from ..plot.dataset_plot import _Dataset_PlotMethods from . import ( alignment, @@ -7538,3 +7539,110 @@ def _wrapper(Y, *coords_, **kwargs): result.attrs = self.attrs.copy() return result + + def convert_calendar( + self, + calendar: str, + dim: str = "time", + align_on: Optional[str] = None, + missing: Optional[Any] = None, + use_cftime: Optional[bool] = None, + ) -> "Dataset": + """Convert the Dataset to another calendar. + + Only converts the individual timestamps, does not modify any data except in dropping invalid/surplus dates or inserting missing dates. + + If the source and target calendars are either no_leap, all_leap or a standard type, only the type of the time array is modified. + When converting to a leap year from a non-leap year, the 29th of February is removed from the array. + In the other direction the 29th of February will be missing in the output, unless `missing` is specified, in which case that value is inserted. + + For conversions involving `360_day` calendars, see Notes. + + This method is safe to use with sub-daily data as it doesn't touch the time part of the timestamps. + + Parameters + --------- + calendar : str + The target calendar name. + dim : str + Name of the time coordinate. + align_on : {None, 'date', 'year'} + Must be specified when either source or target is a `360_day` calendar, ignored otherwise. See Notes. + missing : Optional[any] + A value to use for filling in dates in the target that were missing in the source. + Default (None) is not to fill values, so the output time axis might be non-continuous. + use_cftime : boolean, optional + Whether to use cftime objects in the output, only used if `calendar` is one of {"proleptic_gregorian", "gregorian" or "standard"}. + If True, the new time axis uses cftime objects. If None (default), it uses :py:class:`numpy.datetime64` values if the date range permits it, and :py:class:`cftime.datetime` objects if not. + If False, it uses :py:class:`numpy.datetime64` or fails. + + Returns + ------- + Copy of the dataset with the time coordinate converted to the target calendar. + If `missing` was None (default), invalid dates in the new calendar are dropped, but missing dates are not inserted. + If `missing` was given, the new data is reindexed to have a continuous time axis, filling missing datapoints with `missing`. + + Notes + ----- + If one of the source or target calendars is `360_day`, `align_on` must be specified and two options are offered. + + "year" + The dates are translated according to their relative position in the year, ignoring their original month and day information, + meaning that the missing/surplus days are added/removed at regular intervals. + + From a `360_day` to a standard calendar, the output will be missing the following dates (day of year in parenthesis): + To a leap year: + January 31st (31), March 31st (91), June 1st (153), July 31st (213), September 31st (275) and November 30th (335). + To a non-leap year: + February 6th (36), April 19th (109), July 2nd (183), September 12th (255), November 25th (329). + + From standard calendar to a '360_day', the following dates in the source array will be dropped: + From a leap year: + January 31st (31), April 1st (92), June 1st (153), August 1st (214), September 31st (275), December 1st (336) + From a non-leap year: + February 6th (37), April 20th (110), July 2nd (183), September 13th (256), November 25th (329) + + This option is best used on daily and subdaily data. + + "date" + The month/day information is conserved and invalid dates are dropped from the output. This means that when converting from + a `360_day` to a standard calendar, all 31st (Jan, March, May, July, August, October and December) will be missing as there is no equivalent + dates in the `360_day` and the 29th (on non-leap years) and 30th of February will be dropped as there are no equivalent dates in + a standard calendar. + + This option is best used with data on a frequency coarser than daily. + """ + return convert_calendar( + self, + calendar, + dim=dim, + align_on=align_on, + missing=missing, + use_cftime=use_cftime, + ) + + def interp_calendar( + self, target: Union[pd.DatetimeIndex, CFTimeIndex, DataArray], dim: str = "time" + ) -> "Dataset": + """Interpolates the Dataset to another calendar based on decimal year measure. + + Each timestamp in the dataset and the target are first converted to their decimal year equivalent + then source is interpolated on the target coordinate. The decimal year is the number of + years since 0001-01-01 AD. + Ex: '2000-03-01 12:00' is 2000.1653 in a standard calendar or 2000.16301 in a 'noleap' calendar. + + This method should only be used when the time (HH:MM:SS) information of time coordinate is not important. + + Parameters + ---------- + target: DataArray or DatetimeIndex or CFTimeIndex + The target time coordinate of a valid dtype (np.datetime64 or cftime objects) + dim : str + The time coordinate name. + + Return + ------ + Dataset + The source interpolated on the decimal years of target, + """ + return interp_calendar(self, target, dim=dim) From bc7a91252f5bfca9f37db17272846dc8437bd36e Mon Sep 17 00:00:00 2001 From: Phobos Date: Mon, 17 May 2021 15:44:39 -0400 Subject: [PATCH 13/30] use proper type annotation --- xarray/core/dataset.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 3dc516aac09..2b7b23eee8a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -7622,7 +7622,9 @@ def convert_calendar( ) def interp_calendar( - self, target: Union[pd.DatetimeIndex, CFTimeIndex, DataArray], dim: str = "time" + self, + target: Union[pd.DatetimeIndex, CFTimeIndex, "DataArray"], + dim: str = "time", ) -> "Dataset": """Interpolates the Dataset to another calendar based on decimal year measure. From 5aa9732e3f8dda6f82733f425ded914d9a562961 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Fri, 13 Aug 2021 14:26:32 -0400 Subject: [PATCH 14/30] Apply suggestions from code review Co-authored-by: Spencer Clark --- xarray/coding/calendar_ops.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index a9154e7ec27..9dc15b2bbb8 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -15,7 +15,10 @@ def _days_in_year(year, calendar, use_cftime=True): """Return the number of days in the input year according to the input calendar.""" date_type = get_date_type(calendar, use_cftime=use_cftime) - difference = date_type(year + 1, 1, 1) - date_type(year, 1, 1) + if year == -1 and calendar in ["gregorian", "julian", "proleptic_gregorian"]: + difference = date_type(year + 2, 1, 1) - date_type(year, 1, 1) + else: + difference = date_type(year + 1, 1, 1) - date_type(year, 1, 1) return difference.days @@ -179,7 +182,7 @@ def _convert_datetime(date, new_doy, calendar): out[dim] = new_times # Remove NaN that where put on invalid dates in target calendar - out = out.where(out[dim].notnull(), drop=True) + out = out.dropna(dim) if missing is not None: time_target = date_range_like(time, calendar=calendar, use_cftime=use_cftime) @@ -194,7 +197,7 @@ def _convert_datetime(date, new_doy, calendar): def _datetime_to_decimal_year(times, calendar=None): """Convert a datetime DataArray to decimal years according to its calendar or the given one. - Decimal years are the number of years since 0001-01-01 00:00:00 AD. + The decimal year of a timestamp is its year plus its sub-year component converted to the fraction of its year. Ex: '2000-03-01 12:00' is 2000.1653 in a standard calendar, 2000.16301 in a "noleap" or 2000.16806 in a "360_day". """ from ..core.dataarray import DataArray @@ -256,7 +259,7 @@ def interp_calendar(source, target, dim="time"): cal_tgt = target.dt.calendar out = source.copy() - out[dim] = _datetime_to_decimal_year(source[dim], calendar=cal_src).drop_vars(dim) + out[dim] = _datetime_to_decimal_year(source[dim], calendar=cal_src) target_idx = _datetime_to_decimal_year(target, calendar=cal_tgt) out = out.interp(**{dim: target_idx}) out[dim] = target From ca566bd4aa2a0ad46410d3ef47a337eb253b3119 Mon Sep 17 00:00:00 2001 From: Phobos Date: Fri, 13 Aug 2021 14:39:42 -0400 Subject: [PATCH 15/30] some more modifications after review --- xarray/coding/calendar_ops.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index 9dc15b2bbb8..0e2f8f5a09d 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -12,6 +12,10 @@ cftime = None +# Calendar names that have no year zero by default. +_calendars_without_zero = ["gregorian", "proleptic_gregorian", "julian", "standard"] + + def _days_in_year(year, calendar, use_cftime=True): """Return the number of days in the input year according to the input calendar.""" date_type = get_date_type(calendar, use_cftime=use_cftime) @@ -113,6 +117,11 @@ def convert_calendar( if src_cal == tgt_cal: return ds + if (time.dt.year == 0).any() and tgt_cal in _calendars_without_zero: + raise ValueError( + f"Source time coordinate contains dates with year 0, which is not supported by target calendar {tgt_cal}." + ) + if (source == "360_day" or calendar == "360_day") and align_on is None: raise ValueError( "Argument `align_on` must be specified with either 'date' or " @@ -194,7 +203,7 @@ def _convert_datetime(date, new_doy, calendar): return out -def _datetime_to_decimal_year(times, calendar=None): +def _datetime_to_decimal_year(times, dim="time", calendar=None): """Convert a datetime DataArray to decimal years according to its calendar or the given one. The decimal year of a timestamp is its year plus its sub-year component converted to the fraction of its year. @@ -212,12 +221,12 @@ def _make_index(time): doys = cftime.date2num(time, f"days since {year:04d}-01-01", calendar=calendar) return DataArray( year + doys / _days_in_year(year, calendar), - dims=time.dims, + dims=(dim,), coords=time.coords, - name="time", + name=dim, ) - return times.groupby("time.year").map(_make_index) + return times.groupby(f"{dim}.year").map(_make_index) def interp_calendar(source, target, dim="time"): @@ -258,9 +267,14 @@ def interp_calendar(source, target, dim="time"): target = DataArray(target, dims=(dim,), name=dim) cal_tgt = target.dt.calendar + if (source[dim].time.dt.year == 0).any() and cal_tgt in _calendars_without_zero: + raise ValueError( + f"Source time coordinate contains dates with year 0, which is not supported by target calendar {cal_tgt}." + ) + out = source.copy() - out[dim] = _datetime_to_decimal_year(source[dim], calendar=cal_src) - target_idx = _datetime_to_decimal_year(target, calendar=cal_tgt) + out[dim] = _datetime_to_decimal_year(source[dim], dim=dim, calendar=cal_src) + target_idx = _datetime_to_decimal_year(target, dim=dim, calendar=cal_tgt) out = out.interp(**{dim: target_idx}) out[dim] = target return out From a3e9fb2bcc2c015e460830aadc332d42afd757ad Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Tue, 31 Aug 2021 12:50:02 -0400 Subject: [PATCH 16/30] Apply suggestions from code review The code will break with this commit. Variable renaming to be done throughout all functions. Co-authored-by: Spencer Clark --- xarray/coding/calendar_ops.py | 68 ++++++++++++++++----------------- xarray/coding/cftime_offsets.py | 12 +++--- xarray/coding/times.py | 2 +- 3 files changed, 38 insertions(+), 44 deletions(-) diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index 0e2f8f5a09d..d911f66404a 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -12,14 +12,13 @@ cftime = None -# Calendar names that have no year zero by default. -_calendars_without_zero = ["gregorian", "proleptic_gregorian", "julian", "standard"] +_CALENDARS_WITHOUT_YEAR_ZERO = ["gregorian", "proleptic_gregorian", "julian", "standard"] def _days_in_year(year, calendar, use_cftime=True): """Return the number of days in the input year according to the input calendar.""" date_type = get_date_type(calendar, use_cftime=use_cftime) - if year == -1 and calendar in ["gregorian", "julian", "proleptic_gregorian"]: + if year == -1 and calendar in _CALENDARS_WITHOUT_YEAR_ZERO: difference = date_type(year + 2, 1, 1) - date_type(year, 1, 1) else: difference = date_type(year + 1, 1, 1) - date_type(year, 1, 1) @@ -34,32 +33,30 @@ def convert_calendar( missing=None, use_cftime=None, ): - """Convert the Dataset or DataArray to another calendar. + """Transform a time-indexed Dataset or DataArray to one that uses another calendar. - Only converts the individual timestamps, does not modify any data except in dropping invalid/surplus dates or inserting missing dates. + This function only converts the individual timestamps; it does not modify any data except in dropping invalid/surplus dates, or inserting values for missing dates. If the source and target calendars are either no_leap, all_leap or a standard type, only the type of the time array is modified. - When converting to a leap year from a non-leap year, the 29th of February is removed from the array. - In the other direction the 29th of February will be missing in the output, unless `missing` is specified, in which case that value is inserted. - - For conversions involving `360_day` calendars, see Notes. + When converting to a calendar with a leap year from to a calendar without a leap year, the 29th of February will be removed from the array. + In the other direction the 29th of February will be missing in the output, unless `missing` is specified, in which case that value is inserted. For conversions involving `360_day` calendars, see Notes. This method is safe to use with sub-daily data as it doesn't touch the time part of the timestamps. Parameters ---------- - ds : DataArray or Dataset - Input array/dataset with a time coordinate of a valid dtype (:py:class:`numpy.datetime64` or :py:class:`cftime.datetime`). + obj : DataArray or Dataset + Input DataArray or Dataset with a time coordinate of a valid dtype (:py:class:`numpy.datetime64` or :py:class:`cftime.datetime`). calendar : str The target calendar name. dim : str - Name of the time coordinate. + Name of the time coordinate in the input DataArray or Dataset. align_on : {None, 'date', 'year'} - Must be specified when either source or target is a `360_day` calendar, ignored otherwise. See Notes. - missing : Optional[any] - A value to use for filling in dates in the target that were missing in the source. + Must be specified when either the source or target is a `"360_day"` calendar; ignored otherwise. See Notes. + missing : any, optional + A value to use for filling in dates in the target calendar that were missing in the source's. Default (None) is not to fill values, so the output time axis might be non-continuous. - use_cftime : boolean, optional + use_cftime : bool, optional Whether to use cftime objects in the output, only used if `calendar` is one of {"proleptic_gregorian", "gregorian" or "standard"}. If True, the new time axis uses cftime objects. If None (default), it uses :py:class:`numpy.datetime64` values if the date range permits it, and :py:class:`cftime.datetime` objects if not. If False, it uses :py:class:`numpy.datetime64` or fails. @@ -72,19 +69,19 @@ def convert_calendar( Notes ----- - If one of the source or target calendars is `360_day`, `align_on` must be specified and two options are offered. + If one of the source or target calendars is `"360_day"`, `align_on` must be specified and two options are offered. "year" The dates are translated according to their relative position in the year, ignoring their original month and day information, meaning that the missing/surplus days are added/removed at regular intervals. - From a `360_day` to a standard calendar, the output will be missing the following dates (day of year in parenthesis): + From a `360_day` to a standard calendar, the output will be missing the following dates (day of year in parentheses): To a leap year: January 31st (31), March 31st (91), June 1st (153), July 31st (213), September 31st (275) and November 30th (335). To a non-leap year: February 6th (36), April 19th (109), July 2nd (183), September 12th (255), November 25th (329). - From standard calendar to a '360_day', the following dates in the source array will be dropped: + From a standard calendar to a `"360_day"`, the following dates in the source array will be dropped: From a leap year: January 31st (31), April 1st (92), June 1st (153), August 1st (214), September 31st (275), December 1st (336) From a non-leap year: @@ -94,8 +91,8 @@ def convert_calendar( "date" The month/day information is conserved and invalid dates are dropped from the output. This means that when converting from - a `360_day` to a standard calendar, all 31st (Jan, March, May, July, August, October and December) will be missing as there is no equivalent - dates in the `360_day` and the 29th (on non-leap years) and 30th of February will be dropped as there are no equivalent dates in + a `"360_day"` to a standard calendar, all 31st (Jan, March, May, July, August, October and December) will be missing as there is no equivalent + dates in the `"360_day"` calendar and the 29th (on non-leap years) and 30th of February will be dropped as there are no equivalent dates in a standard calendar. This option is best used with data on a frequency coarser than daily. @@ -104,13 +101,13 @@ def convert_calendar( # internal hack to mean pandas-backed standard calendar from ..core.dataarray import DataArray - time = ds[dim] # for convenience + time = ds[dim] if not _contains_datetime_like_objects(time): raise ValueError(f"Coordinate {dim} must contain datetime objects.") use_cftime = _should_cftime_be_used(time, calendar, use_cftime) - source = time.dt.calendar + source_calendar = time.dt.calendar src_cal = "datetime64" if is_np_datetime_like(time.dtype) else source tgt_cal = calendar if use_cftime else "datetime64" @@ -171,7 +168,7 @@ def _convert_datetime(date, new_doy, calendar): new_doy = time.groupby(f"{dim}.year").map(_yearly_interp_doy) - # Convert the source datetimes, but override the doy with our new doys + # Convert the source datetimes, but override the day of year with our new day of years. out[dim] = DataArray( [ _convert_datetime(date, newdoy, calendar) @@ -191,7 +188,7 @@ def _convert_datetime(date, new_doy, calendar): out[dim] = new_times # Remove NaN that where put on invalid dates in target calendar - out = out.dropna(dim) + out = out.where(out[dim].notnull(), drop=True) if missing is not None: time_target = date_range_like(time, calendar=calendar, use_cftime=use_cftime) @@ -230,27 +227,26 @@ def _make_index(time): def interp_calendar(source, target, dim="time"): - """Interpolates a DataArray/Dataset to another calendar based on decimal year measure. + """Interpolates a DataArray or Dataset indexed by a time coordinate to another calendar based on decimal year measure. - Each timestamp in source and target are first converted to their decimal year equivalent - then source is interpolated on the target coordinate. The decimal year is the number of - years since 0001-01-01 AD. - Ex: '2000-03-01 12:00' is 2000.1653 in a standard calendar or 2000.16301 in a 'noleap' calendar. + Each timestamp in `source` and `target` are first converted to their decimal year equivalent + then `source` is interpolated on the target coordinate. The decimal year of a timestamp is its year plus its sub-year component converted to the fraction of its year. + For example "2000-03-01 12:00" is 2000.1653 in a standard calendar or 2000.16301 in a `"noleap"` calendar. This method should only be used when the time (HH:MM:SS) information of time coordinate is not important. Parameters ---------- - source: Union[DataArray, Dataset] - The source data to interpolate, must have a time coordinate of a valid dtype (:py:class:`numpy.datetime64` or :py:class:`cftime.datetime` objects) - target: DataArray or DatetimeIndex or CFTimeIndex + source: DataArray or Dataset + The source data to interpolate; must have a time coordinate of a valid dtype (:py:class:`numpy.datetime64` or :py:class:`cftime.datetime` objects) + target: DataArray, DatetimeIndex, or CFTimeIndex The target time coordinate of a valid dtype (np.datetime64 or cftime objects) dim : str The time coordinate name. Return ------ - Union[DataArray, Dataset] + DataArray or Dataset The source interpolated on the decimal years of target, """ from ..core.dataarray import DataArray @@ -262,10 +258,10 @@ def interp_calendar(source, target, dim="time"): f"Both 'source.{dim}' and 'target' must contain datetime objects." ) - cal_src = source[dim].dt.calendar + source_calendar = source[dim].dt.calendar if isinstance(target, (pd.DatetimeIndex, CFTimeIndex)): target = DataArray(target, dims=(dim,), name=dim) - cal_tgt = target.dt.calendar + target_calendar = target.dt.calendar if (source[dim].time.dt.year == 0).any() and cal_tgt in _calendars_without_zero: raise ValueError( diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 4555aea76b3..4c5d38d03ae 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -1121,7 +1121,7 @@ def date_range_like(source, calendar, use_cftime=None): Parameters ---------- - source : DataArray or CFTimeIndex or pd.DatetimeIndex + source : DataArray, CFTimeIndex, or pd.DatetimeIndex 1D datetime array calendar : str New calendar name. @@ -1134,14 +1134,12 @@ def date_range_like(source, calendar, use_cftime=None): DataArray 1D datetime coordinate with the same start, end and frequency as the source, but in the new calendar. The start date is assumed to exist in the target calendar. - If the end date doesn't exist, the code tries 1 and 2 calendar days before. - Exception when the source is daily or coarser, then if the end of the input range is on + If the end date doesn't exist, the code tries 1 and 2 calendar days before, with the exception of when the source time series is daily or coarser. In that case if the end of the input range is on the last day of the month, the output range will also end on the last day of the month in the new calendar. """ from ..core.dataarray import DataArray from .frequencies import infer_freq - # Source is a pd.DatetimeImdex or a CFTimeIndex or a DataArray that is 1D AND contains datetime objs. if not isinstance(source, (pd.DatetimeIndex, CFTimeIndex)) and ( isinstance(source, DataArray) and (source.ndim != 1) @@ -1159,10 +1157,10 @@ def date_range_like(source, calendar, use_cftime=None): use_cftime = _should_cftime_be_used(source, calendar, use_cftime) - src_start = source.values.min() - src_end = source.values.max() + source_start = source.values.min() + source_end = source.values.max() if is_np_datetime_like(source.dtype): - src_cal = "datetime64" + source_calendar = "datetime64" # We want to use datetime fields (datetime64 object don't have them) src_start = pd.Timestamp(src_start) src_end = pd.Timestamp(src_end) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 49ddda5f0b2..66b5427a3f1 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -513,7 +513,7 @@ def convert_time_or_go_back(date, date_type): ) -def _should_cftime_be_used(source, target_cal, use_cftime): +def _should_cftime_be_used(source, target_calendar, use_cftime): """Return whether conversion of the source to the target calendar should result in a cftime-backed array. Source is a 1D datetime array, target_cal a string (calendar name) and use_cftime is a boolean or None. From 97909f7ee0972d47fd6823dcdafb30e733039a02 Mon Sep 17 00:00:00 2001 From: Phobos Date: Tue, 31 Aug 2021 13:35:58 -0400 Subject: [PATCH 17/30] Finish applying suggestions from review --- xarray/coding/calendar_ops.py | 243 +++++++++++++++++--------------- xarray/coding/cftime_offsets.py | 19 ++- xarray/coding/times.py | 4 +- 3 files changed, 140 insertions(+), 126 deletions(-) diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index d911f66404a..1ab4d0863cc 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -12,7 +12,12 @@ cftime = None -_CALENDARS_WITHOUT_YEAR_ZERO = ["gregorian", "proleptic_gregorian", "julian", "standard"] +_CALENDARS_WITHOUT_YEAR_ZERO = [ + "gregorian", + "proleptic_gregorian", + "julian", + "standard", +] def _days_in_year(year, calendar, use_cftime=True): @@ -26,7 +31,7 @@ def _days_in_year(year, calendar, use_cftime=True): def convert_calendar( - ds, + obj, calendar, dim="time", align_on=None, @@ -35,143 +40,113 @@ def convert_calendar( ): """Transform a time-indexed Dataset or DataArray to one that uses another calendar. - This function only converts the individual timestamps; it does not modify any data except in dropping invalid/surplus dates, or inserting values for missing dates. - - If the source and target calendars are either no_leap, all_leap or a standard type, only the type of the time array is modified. - When converting to a calendar with a leap year from to a calendar without a leap year, the 29th of February will be removed from the array. - In the other direction the 29th of February will be missing in the output, unless `missing` is specified, in which case that value is inserted. For conversions involving `360_day` calendars, see Notes. - - This method is safe to use with sub-daily data as it doesn't touch the time part of the timestamps. - - Parameters - ---------- - obj : DataArray or Dataset - Input DataArray or Dataset with a time coordinate of a valid dtype (:py:class:`numpy.datetime64` or :py:class:`cftime.datetime`). - calendar : str - The target calendar name. - dim : str - Name of the time coordinate in the input DataArray or Dataset. - align_on : {None, 'date', 'year'} - Must be specified when either the source or target is a `"360_day"` calendar; ignored otherwise. See Notes. - missing : any, optional - A value to use for filling in dates in the target calendar that were missing in the source's. - Default (None) is not to fill values, so the output time axis might be non-continuous. - use_cftime : bool, optional - Whether to use cftime objects in the output, only used if `calendar` is one of {"proleptic_gregorian", "gregorian" or "standard"}. - If True, the new time axis uses cftime objects. If None (default), it uses :py:class:`numpy.datetime64` values if the date range permits it, and :py:class:`cftime.datetime` objects if not. - If False, it uses :py:class:`numpy.datetime64` or fails. - - Returns - ------- - Copy of source with the time coordinate converted to the target calendar. - If `missing` was None (default), invalid dates in the new calendar are dropped, but missing dates are not inserted. - If `missing` was given, the new data is reindexed to have a continuous time axis, filling missing datapoints with `missing`. - - Notes - ----- - If one of the source or target calendars is `"360_day"`, `align_on` must be specified and two options are offered. - - "year" - The dates are translated according to their relative position in the year, ignoring their original month and day information, - meaning that the missing/surplus days are added/removed at regular intervals. - - From a `360_day` to a standard calendar, the output will be missing the following dates (day of year in parentheses): - To a leap year: - January 31st (31), March 31st (91), June 1st (153), July 31st (213), September 31st (275) and November 30th (335). - To a non-leap year: - February 6th (36), April 19th (109), July 2nd (183), September 12th (255), November 25th (329). - - From a standard calendar to a `"360_day"`, the following dates in the source array will be dropped: - From a leap year: - January 31st (31), April 1st (92), June 1st (153), August 1st (214), September 31st (275), December 1st (336) - From a non-leap year: - February 6th (37), April 20th (110), July 2nd (183), September 13th (256), November 25th (329) - - This option is best used on daily and subdaily data. - - "date" - The month/day information is conserved and invalid dates are dropped from the output. This means that when converting from - a `"360_day"` to a standard calendar, all 31st (Jan, March, May, July, August, October and December) will be missing as there is no equivalent - dates in the `"360_day"` calendar and the 29th (on non-leap years) and 30th of February will be dropped as there are no equivalent dates in - a standard calendar. - - This option is best used with data on a frequency coarser than daily. + This function only converts the individual timestamps; it does not modify any data except in dropping invalid/surplus dates, or inserting values for missing dates. + + If the source and target calendars are both from a standard type, only the type of the time array is modified. + When converting to a calendar with a leap year from to a calendar without a leap year, the 29th of February will be removed from the array. + In the other direction the 29th of February will be missing in the output, unless `missing` is specified, in which case that value is inserted. + For conversions involving the `360_day` calendar, see Notes. + + This method is safe to use with sub-daily data as it doesn't touch the time part of the timestamps. + + Parameters + ---------- + obj : DataArray or Dataset + Input DataArray or Dataset with a time coordinate of a valid dtype (:py:class:`numpy.datetime64` or :py:class:`cftime.datetime`). + calendar : str + The target calendar name. + dim : str + Name of the time coordinate in the input DataArray or Dataset. + align_on : {None, 'date', 'year'} + Must be specified when either the source or target is a `"360_day"` calendar; ignored otherwise. See Notes. + missing : any, optional + A value to use for filling in dates in the target calendar that were missing in the source's. + Default (None) is not to fill values, so the output time axis might be non-continuous. + use_cftime : bool, optional + Whether to use cftime objects in the output, only used if `calendar` is one of {"proleptic_gregorian", "gregorian" or "standard"}. + If True, the new time axis uses cftime objects. If None (default), it uses :py:class:`numpy.datetime64` values if the date range permits it, and :py:class:`cftime.datetime` objects if not. + If False, it uses :py:class:`numpy.datetime64` or fails. + + Returns + ------- + Copy of source with the time coordinate converted to the target calendar. + If `missing` was None (default), invalid dates in the new calendar are dropped, but missing dates are not inserted. + If `missing` was given, the new data is reindexed to have a continuous time axis, filling missing datapoints with `missing`. + + Notes + ----- + If one of the source or target calendars is `"360_day"`, `align_on` must be specified and two options are offered. + + "year" + The dates are translated according to their relative position in the year, ignoring their original month and day information, + meaning that the missing/surplus days are added/removed at regular intervals. + + From a `360_day` to a standard calendar, the output will be missing the following dates (day of year in parentheses): + To a leap year: + January 31st (31), March 31st (91), June 1st (153), July 31st (213), September 31st (275) and November 30th (335). + To a non-leap year: + February 6th (36), April 19th (109), July 2nd (183), September 12th (255), November 25th (329). + + From a standard calendar to a `"360_day"`, the following dates in the source array will be dropped: + From a leap year: + January 31st (31), April 1st (92), June 1st (153), August 1st (214), September 31st (275), December 1st (336) + From a non-leap year: + February 6th (37), April 20th (110), July 2nd (183), September 13th (256), November 25th (329) + + This option is best used on daily and subdaily data. + + "date" + The month/day information is conserved and invalid dates are dropped from the output. This means that when converting from + a `"360_day"` to a standard calendar, all 31st (Jan, March, May, July, August, October and December) will be missing as there is no equivalent + dates in the `"360_day"` calendar and the 29th (on non-leap years) and 30th of February will be dropped as there are no equivalent dates in + a standard calendar. + + This option is best used with data on a frequency coarser than daily. """ - # In the following the calendar name "default" is an - # internal hack to mean pandas-backed standard calendar from ..core.dataarray import DataArray - time = ds[dim] + time = obj[dim] if not _contains_datetime_like_objects(time): raise ValueError(f"Coordinate {dim} must contain datetime objects.") use_cftime = _should_cftime_be_used(time, calendar, use_cftime) source_calendar = time.dt.calendar + # Do nothing if request calendar is the same as the source + # AND source is np XOR use_cftime + if source_calendar == calendar and is_np_datetime_like(time.dtype) ^ use_cftime: + return obj - src_cal = "datetime64" if is_np_datetime_like(time.dtype) else source - tgt_cal = calendar if use_cftime else "datetime64" - if src_cal == tgt_cal: - return ds - - if (time.dt.year == 0).any() and tgt_cal in _calendars_without_zero: + if (time.dt.year == 0).any() and calendar in _CALENDARS_WITHOUT_YEAR_ZERO: raise ValueError( - f"Source time coordinate contains dates with year 0, which is not supported by target calendar {tgt_cal}." + f"Source time coordinate contains dates with year 0, which is not supported by target calendar {calendar}." ) - if (source == "360_day" or calendar == "360_day") and align_on is None: + if (source_calendar == "360_day" or calendar == "360_day") and align_on is None: raise ValueError( "Argument `align_on` must be specified with either 'date' or " "'year' when converting to or from a '360_day' calendar." ) - if source != "360_day" and calendar != "360_day": + if source_calendar != "360_day" and calendar != "360_day": align_on = "date" - out = ds.copy() + out = obj.copy() if align_on == "year": # Special case for conversion involving 360_day calendar # Instead of translating dates directly, this tries to keep the position within a year similar. - def _yearly_interp_doy(time): - # Returns the nearest day in the target calendar of the corresponding "decimal year" in the source calendar - yr = int(time.dt.year[0]) - return np.round( - _days_in_year(yr, calendar, use_cftime) - * time.dt.dayofyear - / _days_in_year(yr, source, use_cftime) - ).astype(int) - - def _convert_datetime(date, new_doy, calendar): - """Convert a datetime object to another calendar. - - Redefining the day of year (thus ignoring month and day information from the source datetime). - Nanosecond information are lost as cftime.datetime doesn't support them. - """ - new_date = cftime.num2date( - new_doy - 1, - f"days since {date.year}-01-01", - calendar=calendar if use_cftime else "standard", - ) - try: - return get_date_type(calendar, use_cftime)( - date.year, - new_date.month, - new_date.day, - date.hour, - date.minute, - date.second, - date.microsecond, - ) - except ValueError: - return np.nan - new_doy = time.groupby(f"{dim}.year").map(_yearly_interp_doy) + new_doy = time.groupby(f"{dim}.year").map( + _interpolate_day_of_year, target_calendar=calendar, use_cftime=use_cftime + ) # Convert the source datetimes, but override the day of year with our new day of years. out[dim] = DataArray( [ - _convert_datetime(date, newdoy, calendar) + _convert_to_new_calendar_with_new_day_of_year( + date, newdoy, calendar, use_cftime + ) for date, newdoy in zip(time.variable._data.array, new_doy) ], dims=(dim,), @@ -200,6 +175,44 @@ def _convert_datetime(date, new_doy, calendar): return out +def _interpolate_day_of_year(time, target_calendar, use_cftime): + """Returns the nearest day in the target calendar of the corresponding "decimal year" in the source calendar""" + year = int(time.dt.year[0]) + source_calendar = time.dt.calendar + return np.round( + _days_in_year(year, target_calendar, use_cftime) + * time.dt.dayofyear + / _days_in_year(year, source_calendar, use_cftime) + ).astype(int) + + +def _convert_to_new_calendar_with_new_day_of_year( + date, day_of_year, calendar, use_cftime +): + """Convert a datetime object to another calendar with a new day of year. + + Redefines the day of year (and thus ignores the month and day information from the source datetime). + Nanosecond information is lost as cftime.datetime doesn't support it. + """ + new_date = cftime.num2date( + day_of_year - 1, + f"days since {date.year}-01-01", + calendar=calendar if use_cftime else "standard", + ) + try: + return get_date_type(calendar, use_cftime)( + date.year, + new_date.month, + new_date.day, + date.hour, + date.minute, + date.second, + date.microsecond, + ) + except ValueError: + return np.nan + + def _datetime_to_decimal_year(times, dim="time", calendar=None): """Convert a datetime DataArray to decimal years according to its calendar or the given one. @@ -263,14 +276,16 @@ def interp_calendar(source, target, dim="time"): target = DataArray(target, dims=(dim,), name=dim) target_calendar = target.dt.calendar - if (source[dim].time.dt.year == 0).any() and cal_tgt in _calendars_without_zero: + if ( + source[dim].time.dt.year == 0 + ).any() and target_calendar in _CALENDARS_WITHOUT_YEAR_ZERO: raise ValueError( - f"Source time coordinate contains dates with year 0, which is not supported by target calendar {cal_tgt}." + f"Source time coordinate contains dates with year 0, which is not supported by target calendar {target_calendar}." ) out = source.copy() - out[dim] = _datetime_to_decimal_year(source[dim], dim=dim, calendar=cal_src) - target_idx = _datetime_to_decimal_year(target, dim=dim, calendar=cal_tgt) + out[dim] = _datetime_to_decimal_year(source[dim], dim=dim, calendar=source_calendar) + target_idx = _datetime_to_decimal_year(target, dim=dim, calendar=target_calendar) out = out.interp(**{dim: target_idx}) out[dim] = target return out diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 4c5d38d03ae..db696fd2381 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -1160,26 +1160,25 @@ def date_range_like(source, calendar, use_cftime=None): source_start = source.values.min() source_end = source.values.max() if is_np_datetime_like(source.dtype): - source_calendar = "datetime64" # We want to use datetime fields (datetime64 object don't have them) - src_start = pd.Timestamp(src_start) - src_end = pd.Timestamp(src_end) + source_calendar = "standard" + source_start = pd.Timestamp(source_start) + source_end = pd.Timestamp(source_end) else: if isinstance(source, CFTimeIndex): - src_cal = source.calendar + source_calendar = source.calendar else: # DataArray - src_cal = source.dt.calendar + source_calendar = source.dt.calendar - tgt_cal = calendar if use_cftime else "datetime64" - if src_cal == tgt_cal: + if calendar == source_calendar and is_np_datetime_like(source.dtype) ^ use_cftime: return source date_type = get_date_type(calendar, use_cftime) - start = convert_time_or_go_back(src_start, date_type) - end = convert_time_or_go_back(src_end, date_type) + start = convert_time_or_go_back(source_start, date_type) + end = convert_time_or_go_back(source_end, date_type) # For the cases where the source ends on the end of the month, we expect the same in the new calendar. - if src_end.day == src_end.daysinmonth and isinstance( + if source_end.day == source_end.daysinmonth and isinstance( to_offset(freq), (YearEnd, QuarterEnd, MonthEnd, Day) ): end = end.replace(day=end.daysinmonth) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 66b5427a3f1..a7d4a3f4d2d 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -521,7 +521,7 @@ def _should_cftime_be_used(source, target_calendar, use_cftime): """ # Arguments Checks for target if use_cftime is not True: - if _is_standard_calendar(target_cal): + if _is_standard_calendar(target_calendar): if _is_numpy_compatible_time_range(source): # Conversion is possible with pandas, force False if it was None use_cftime = False @@ -531,7 +531,7 @@ def _should_cftime_be_used(source, target_calendar, use_cftime): ) elif use_cftime is False: raise ValueError( - f"Calendar '{target_cal}' is only valid with cftime. Try using `use_cftime=True`." + f"Calendar '{target_calendar}' is only valid with cftime. Try using `use_cftime=True`." ) else: use_cftime = True From 507c501770975444425fc1fe52d86b4d7d05267b Mon Sep 17 00:00:00 2001 From: Phobos Date: Tue, 31 Aug 2021 14:03:58 -0400 Subject: [PATCH 18/30] Put back missing @require_cftime --- xarray/tests/test_accessor_dt.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 91def20a685..04efb9c3d51 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -472,6 +472,7 @@ def test_calendar_dask_cftime() -> None: assert data.dt.calendar == "noleap" +@requires_cftime def test_isocalendar_cftime(data) -> None: with pytest.raises( From 44be4e5a0b3dde6acb1bf2b9e56f1fdceb04149d Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 23 Sep 2021 11:48:16 -0400 Subject: [PATCH 19/30] Apply suggestions from code review Co-authored-by: Spencer Clark --- doc/whats-new.rst | 2 +- xarray/coding/cftime_offsets.py | 4 ++-- xarray/tests/test_accessor_dt.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1d2baac87d4..92c9803f6ca 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -31,7 +31,7 @@ New Features - Added ``storage_options`` argument to :py:meth:`to_zarr` (:issue:`5601`). By `Ray Bell `_, `Zachary Blackwood `_ and `Nathan Lis `_. -- Added calendar utilies :py:func:`convert_calendar`, :py:func:`interp_calendar`, :py:func:`date_range`, :py:func:`date_range_like` and :py:attr:`DataArray.dt.calendar`. (:pull:`5233`). +- Added calendar utilities :py:func:`DataArray.convert_calendar`, :py:func:`DataArray.interp_calendar`, :py:func:`date_range`, :py:func:`date_range_like` and :py:attr:`DataArray.dt.calendar`. (:pull:`5233`). By `Pascal Bourgault `_. Breaking changes diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index db696fd2381..e581e4ec118 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -1036,7 +1036,7 @@ def date_range( ): """Return a fixed frequency datetime index. - The type (CFTimeIndex or pd.DatetimeIndex) of the returned index depends + The type (:py:class:`xarray.CFTimeIndex` or :py:class:`pandas.DatetimeIndex`) of the returned index depends on the requested calendar and on `use_cftime`. Parameters @@ -1126,7 +1126,7 @@ def date_range_like(source, calendar, use_cftime=None): calendar : str New calendar name. use_cftime : bool, optional - If True, the output uses :py:class`cftime.datetime` objects. If None (default), :py:class:`numpy.datetime64` values are used if possible. + If True, the output uses :py:class:`cftime.datetime` objects. If None (default), :py:class:`numpy.datetime64` values are used if possible. If False, :py:class:`numpy.datetime64` values are used or an error is raised. Returns diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 04efb9c3d51..9a104e4a5e7 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -459,7 +459,7 @@ def test_calendar_dask() -> None: def test_calendar_dask_cftime() -> None: from cftime import num2date - # 3D lazy dask - np + # 3D lazy dask data = xr.DataArray( num2date( np.random.randint(1, 1000000, size=(4, 5, 6)), From c4570d8233aa72bb7f12deda39cf6e6ce964c56c Mon Sep 17 00:00:00 2001 From: Phobos Date: Thu, 23 Sep 2021 13:37:29 -0400 Subject: [PATCH 20/30] Add tests - few fixes --- xarray/coding/calendar_ops.py | 5 +- xarray/coding/times.py | 12 ++++- xarray/core/dataarray.py | 2 +- xarray/tests/test_accessor_dt.py | 3 ++ xarray/tests/test_calendar_ops.py | 78 +++++++++++++++++++++++------ xarray/tests/test_cftime_offsets.py | 38 ++++++++++++++ xarray/tests/test_coding_times.py | 19 +++++++ 7 files changed, 137 insertions(+), 20 deletions(-) diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index 1ab4d0863cc..a9201442699 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -264,6 +264,9 @@ def interp_calendar(source, target, dim="time"): """ from ..core.dataarray import DataArray + if isinstance(target, (pd.DatetimeIndex, CFTimeIndex)): + target = DataArray(target, dims=(dim,), name=dim) + if not _contains_datetime_like_objects( source[dim] ) or not _contains_datetime_like_objects(target): @@ -272,8 +275,6 @@ def interp_calendar(source, target, dim="time"): ) source_calendar = source[dim].dt.calendar - if isinstance(target, (pd.DatetimeIndex, CFTimeIndex)): - target = DataArray(target, dims=(dim,), name=dim) target_calendar = target.dt.calendar if ( diff --git a/xarray/coding/times.py b/xarray/coding/times.py index a7d4a3f4d2d..62a5571886a 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -85,6 +85,10 @@ def _is_numpy_compatible_time_range(times): convert_time_or_go_back(tmax, pd.Timestamp) except pd.errors.OutOfBoundsDatetime: return False + except ValueError as err: + if err.args[0] == "year 0 is out of range": + return False + raise else: return True @@ -444,12 +448,14 @@ def convert_times(times, date_type, raise_on_invalid=True): If raise_on_valid is True (default), invalid dates trigger a ValueError. Otherwise, the invalid element is replaced by np.NaN for cftime types and np.NaT for np.datetime64. """ - if date_type in (pd.Timestamp, np.datetime64): + if date_type in (pd.Timestamp, np.datetime64) and not is_np_datetime_like( + times.dtype + ): return cftime_to_nptime(times, raise_on_invalid=raise_on_invalid) - new = np.empty(times.shape, dtype="O") if is_np_datetime_like(times.dtype): # Convert datetime64 objects to Timestamps since those have year, month, day, etc. attributes times = pd.DatetimeIndex(times) + new = np.empty(times.shape, dtype="O") for i, t in enumerate(times): try: dt = date_type( @@ -488,6 +494,8 @@ def convert_time_or_go_back(date, date_type): date.second, date.microsecond, ) + except OutOfBoundsDatetime: + raise except ValueError: # Day is invalid, happens at the end of months, try again the day before try: diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 13ef7c1ac90..3d77323a657 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4723,7 +4723,7 @@ def interp_calendar( Return ------ - Dataset + DataArray The source interpolated on the decimal years of target, """ return interp_calendar(self, target, dim=dim) diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 9a104e4a5e7..12ad87a1124 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -434,6 +434,9 @@ def test_calendar_cftime(data) -> None: expected = data.time.values[0].calendar assert data.time.dt.calendar == expected + +@requires_cftime +def test_calendar_cftime_2D(data) -> None: # 2D np datetime: data = xr.DataArray( np.random.randint(1, 1000000, size=(4, 5)).astype(" None: # original form throughout the roundtripping process, uppercase letters and # all. assert_identical(variable, encoded) + + +@requires_cftime +def test_should_cftime_be_used_source_outside_range(): + src = cftime_range("1000-01-01", periods=100, freq="MS", calendar="noleap") + with pytest.raises( + ValueError, match="Source time range is not valid for numpy datetimes." + ): + _should_cftime_be_used(src, "standard", False) + + +@requires_cftime +def test_should_cftime_be_used_target_not_npable(): + src = cftime_range("2000-01-01", periods=100, freq="MS", calendar="noleap") + with pytest.raises( + ValueError, match="Calendar 'noleap' is only valid with cftime." + ): + _should_cftime_be_used(src, "noleap", False) From 208823080a763bdefcee630bb44ca075c531c819 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 29 Sep 2021 13:31:12 -0400 Subject: [PATCH 21/30] wrap docstrings --- xarray/coding/calendar_ops.py | 176 +++++++++++++++++++------------- xarray/coding/cftime_offsets.py | 23 +++-- xarray/coding/times.py | 8 +- xarray/core/dataarray.py | 87 ++++++++++------ xarray/core/dataset.py | 91 +++++++++++------ 5 files changed, 237 insertions(+), 148 deletions(-) diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index a9201442699..7c951f0c5ec 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -40,68 +40,90 @@ def convert_calendar( ): """Transform a time-indexed Dataset or DataArray to one that uses another calendar. - This function only converts the individual timestamps; it does not modify any data except in dropping invalid/surplus dates, or inserting values for missing dates. - - If the source and target calendars are both from a standard type, only the type of the time array is modified. - When converting to a calendar with a leap year from to a calendar without a leap year, the 29th of February will be removed from the array. - In the other direction the 29th of February will be missing in the output, unless `missing` is specified, in which case that value is inserted. - For conversions involving the `360_day` calendar, see Notes. - - This method is safe to use with sub-daily data as it doesn't touch the time part of the timestamps. - - Parameters - ---------- - obj : DataArray or Dataset - Input DataArray or Dataset with a time coordinate of a valid dtype (:py:class:`numpy.datetime64` or :py:class:`cftime.datetime`). - calendar : str - The target calendar name. - dim : str - Name of the time coordinate in the input DataArray or Dataset. - align_on : {None, 'date', 'year'} - Must be specified when either the source or target is a `"360_day"` calendar; ignored otherwise. See Notes. - missing : any, optional - A value to use for filling in dates in the target calendar that were missing in the source's. - Default (None) is not to fill values, so the output time axis might be non-continuous. - use_cftime : bool, optional - Whether to use cftime objects in the output, only used if `calendar` is one of {"proleptic_gregorian", "gregorian" or "standard"}. - If True, the new time axis uses cftime objects. If None (default), it uses :py:class:`numpy.datetime64` values if the date range permits it, and :py:class:`cftime.datetime` objects if not. - If False, it uses :py:class:`numpy.datetime64` or fails. - - Returns - ------- - Copy of source with the time coordinate converted to the target calendar. - If `missing` was None (default), invalid dates in the new calendar are dropped, but missing dates are not inserted. - If `missing` was given, the new data is reindexed to have a continuous time axis, filling missing datapoints with `missing`. - - Notes - ----- - If one of the source or target calendars is `"360_day"`, `align_on` must be specified and two options are offered. - - "year" - The dates are translated according to their relative position in the year, ignoring their original month and day information, - meaning that the missing/surplus days are added/removed at regular intervals. - - From a `360_day` to a standard calendar, the output will be missing the following dates (day of year in parentheses): - To a leap year: - January 31st (31), March 31st (91), June 1st (153), July 31st (213), September 31st (275) and November 30th (335). - To a non-leap year: - February 6th (36), April 19th (109), July 2nd (183), September 12th (255), November 25th (329). - - From a standard calendar to a `"360_day"`, the following dates in the source array will be dropped: - From a leap year: - January 31st (31), April 1st (92), June 1st (153), August 1st (214), September 31st (275), December 1st (336) - From a non-leap year: - February 6th (37), April 20th (110), July 2nd (183), September 13th (256), November 25th (329) - - This option is best used on daily and subdaily data. - - "date" - The month/day information is conserved and invalid dates are dropped from the output. This means that when converting from - a `"360_day"` to a standard calendar, all 31st (Jan, March, May, July, August, October and December) will be missing as there is no equivalent - dates in the `"360_day"` calendar and the 29th (on non-leap years) and 30th of February will be dropped as there are no equivalent dates in - a standard calendar. - - This option is best used with data on a frequency coarser than daily. + This function only converts the individual timestamps; it does not modify any + data except in dropping invalid/surplus dates, or inserting values for missing dates. + + If the source and target calendars are both from a standard type, only the + type of the time array is modified. When converting to a calendar with a + leap year from to a calendar without a leap year, the 29th of February will + be removed from the array. In the other direction the 29th of February will + be missing in the output, unless `missing` is specified, in which case that + value is inserted. For conversions involving the `360_day` calendar, see Notes. + + This method is safe to use with sub-daily data as it doesn't touch the time + part of the timestamps. + + Parameters + ---------- + obj : DataArray or Dataset + Input DataArray or Dataset with a time coordinate of a valid dtype + (:py:class:`numpy.datetime64` or :py:class:`cftime.datetime`). + calendar : str + The target calendar name. + dim : str + Name of the time coordinate in the input DataArray or Dataset. + align_on : {None, 'date', 'year'} + Must be specified when either the source or target is a `"360_day"` + calendar; ignored otherwise. See Notes. + missing : any, optional + A value to use for filling in dates in the target calendar that were + missing in the source's. Default (None) is not to fill values, so the + output time axis might be non-continuous. + use_cftime : bool, optional + Whether to use cftime objects in the output, only used if `calendar` is + one of {"proleptic_gregorian", "gregorian" or "standard"}. + If True, the new time axis uses cftime objects. + If None (default), it uses :py:class:`numpy.datetime64` values if the date + range permits it, and :py:class:`cftime.datetime` objects if not. + If False, it uses :py:class:`numpy.datetime64` or fails. + + Returns + ------- + Copy of source with the time coordinate converted to the target calendar. + If `missing` was None (default), invalid dates in the new calendar are + dropped, but missing dates are not inserted. + If `missing` was given, the new data is reindexed to have a continuous + time axis, filling missing datapoints with `missing`. + + Notes + ----- + If one of the source or target calendars is `"360_day"`, `align_on` must + be specified and two options are offered. + + "year" + The dates are translated according to their relative position in the year, + ignoring their original month and day information, meaning that the + missing/surplus days are added/removed at regular intervals. + + From a `360_day` to a standard calendar, the output will be missing the + following dates (day of year in parentheses): + To a leap year: + January 31st (31), March 31st (91), June 1st (153), July 31st (213), + September 31st (275) and November 30th (335). + To a non-leap year: + February 6th (36), April 19th (109), July 2nd (183), + September 12th (255), November 25th (329). + + From a standard calendar to a `"360_day"`, the following dates in the + source array will be dropped: + From a leap year: + January 31st (31), April 1st (92), June 1st (153), August 1st (214), + September 31st (275), December 1st (336) + From a non-leap year: + February 6th (37), April 20th (110), July 2nd (183), + September 13th (256), November 25th (329) + + This option is best used on daily and subdaily data. + + "date" + The month/day information is conserved and invalid dates are dropped + from the output. This means that when converting from a `"360_day"` to a + standard calendar, all 31st (Jan, March, May, July, August, October and + December) will be missing as there is no equivalent dates in the + `"360_day"` calendar and the 29th (on non-leap years) and 30th of February + will be dropped as there are no equivalent dates in a standard calendar. + + This option is best used with data on a frequency coarser than daily. """ from ..core.dataarray import DataArray @@ -176,7 +198,9 @@ def convert_calendar( def _interpolate_day_of_year(time, target_calendar, use_cftime): - """Returns the nearest day in the target calendar of the corresponding "decimal year" in the source calendar""" + """Returns the nearest day in the target calendar of the corresponding + "decimal year" in the source calendar. + """ year = int(time.dt.year[0]) source_calendar = time.dt.calendar return np.round( @@ -191,7 +215,8 @@ def _convert_to_new_calendar_with_new_day_of_year( ): """Convert a datetime object to another calendar with a new day of year. - Redefines the day of year (and thus ignores the month and day information from the source datetime). + Redefines the day of year (and thus ignores the month and day information + from the source datetime). Nanosecond information is lost as cftime.datetime doesn't support it. """ new_date = cftime.num2date( @@ -216,8 +241,10 @@ def _convert_to_new_calendar_with_new_day_of_year( def _datetime_to_decimal_year(times, dim="time", calendar=None): """Convert a datetime DataArray to decimal years according to its calendar or the given one. - The decimal year of a timestamp is its year plus its sub-year component converted to the fraction of its year. - Ex: '2000-03-01 12:00' is 2000.1653 in a standard calendar, 2000.16301 in a "noleap" or 2000.16806 in a "360_day". + The decimal year of a timestamp is its year plus its sub-year component + converted to the fraction of its year. + Ex: '2000-03-01 12:00' is 2000.1653 in a standard calendar, + 2000.16301 in a "noleap" or 2000.16806 in a "360_day". """ from ..core.dataarray import DataArray @@ -240,18 +267,23 @@ def _make_index(time): def interp_calendar(source, target, dim="time"): - """Interpolates a DataArray or Dataset indexed by a time coordinate to another calendar based on decimal year measure. + """Interpolates a DataArray or Dataset indexed by a time coordinate to + another calendar based on decimal year measure. - Each timestamp in `source` and `target` are first converted to their decimal year equivalent - then `source` is interpolated on the target coordinate. The decimal year of a timestamp is its year plus its sub-year component converted to the fraction of its year. - For example "2000-03-01 12:00" is 2000.1653 in a standard calendar or 2000.16301 in a `"noleap"` calendar. + Each timestamp in `source` and `target` are first converted to their decimal + year equivalent then `source` is interpolated on the target coordinate. + The decimal year of a timestamp is its year plus its sub-year component + converted to the fraction of its year. For example "2000-03-01 12:00" is + 2000.1653 in a standard calendar or 2000.16301 in a `"noleap"` calendar. - This method should only be used when the time (HH:MM:SS) information of time coordinate is not important. + This method should only be used when the time (HH:MM:SS) information of + time coordinate is not important. Parameters ---------- source: DataArray or Dataset - The source data to interpolate; must have a time coordinate of a valid dtype (:py:class:`numpy.datetime64` or :py:class:`cftime.datetime` objects) + The source data to interpolate; must have a time coordinate of a valid + dtype (:py:class:`numpy.datetime64` or :py:class:`cftime.datetime` objects) target: DataArray, DatetimeIndex, or CFTimeIndex The target time coordinate of a valid dtype (np.datetime64 or cftime objects) dim : str diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 26e3d1dfa9e..8dd3d335adc 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -1045,8 +1045,8 @@ def date_range( ): """Return a fixed frequency datetime index. - The type (:py:class:`xarray.CFTimeIndex` or :py:class:`pandas.DatetimeIndex`) of the returned index depends - on the requested calendar and on `use_cftime`. + The type (:py:class:`xarray.CFTimeIndex` or :py:class:`pandas.DatetimeIndex`) + of the returned index depends on the requested calendar and on `use_cftime`. Parameters ---------- @@ -1074,8 +1074,8 @@ def date_range( use_cftime : boolean, optional If True, always return a CFTimeIndex. If False, return a pd.DatetimeIndex if possible or raise a ValueError. - If None (default), return a pd.DatetimeIndex if possible, otherwise return a CFTimeIndex. - Defaults to False if `tz` is not None. + If None (default), return a pd.DatetimeIndex if possible, + otherwise return a CFTimeIndex. Defaults to False if `tz` is not None. Returns ------- @@ -1126,7 +1126,8 @@ def date_range( def date_range_like(source, calendar, use_cftime=None): - """Generate a datetime array with the same frequency, start and end as another one, but in a different calendar. + """Generate a datetime array with the same frequency, start and end as + another one, but in a different calendar. Parameters ---------- @@ -1135,16 +1136,20 @@ def date_range_like(source, calendar, use_cftime=None): calendar : str New calendar name. use_cftime : bool, optional - If True, the output uses :py:class:`cftime.datetime` objects. If None (default), :py:class:`numpy.datetime64` values are used if possible. + If True, the output uses :py:class:`cftime.datetime` objects. + If None (default), :py:class:`numpy.datetime64` values are used if possible. If False, :py:class:`numpy.datetime64` values are used or an error is raised. Returns ------- DataArray - 1D datetime coordinate with the same start, end and frequency as the source, but in the new calendar. + 1D datetime coordinate with the same start, end and frequency as the + source, but in the new calendar. The start date is assumed to exist in the target calendar. - If the end date doesn't exist, the code tries 1 and 2 calendar days before, with the exception of when the source time series is daily or coarser. In that case if the end of the input range is on - the last day of the month, the output range will also end on the last day of the month in the new calendar. + If the end date doesn't exist, the code tries 1 and 2 calendar days + before, with the exception of when the source time series is daily or + coarser. In that case if the end of the input range is on the last day + of the month, the output range will also end on the last day of the month in the new calendar. """ from ..core.dataarray import DataArray from .frequencies import infer_freq diff --git a/xarray/coding/times.py b/xarray/coding/times.py index ce988ba010e..9e6b978d4af 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -527,10 +527,12 @@ def convert_time_or_go_back(date, date_type): def _should_cftime_be_used(source, target_calendar, use_cftime): - """Return whether conversion of the source to the target calendar should result in a cftime-backed array. + """Return whether conversion of the source to the target calendar should + result in a cftime-backed array. - Source is a 1D datetime array, target_cal a string (calendar name) and use_cftime is a boolean or None. - If use_cftime is None, this returns True if the source's range and target calendar are convertible to np.datetime64 objects. + Source is a 1D datetime array, target_cal a string (calendar name) and + use_cftime is a boolean or None. If use_cftime is None, this returns True + if the source's range and target calendar are convertible to np.datetime64 objects. """ # Arguments Checks for target if use_cftime is not True: diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 3d77323a657..7a1d495c810 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4629,15 +4629,20 @@ def convert_calendar( ) -> "DataArray": """Convert the DataArray to another calendar. - Only converts the individual timestamps, does not modify any data except in dropping invalid/surplus dates or inserting missing dates. + Only converts the individual timestamps, does not modify any data except + in dropping invalid/surplus dates or inserting missing dates. - If the source and target calendars are either no_leap, all_leap or a standard type, only the type of the time array is modified. - When converting to a leap year from a non-leap year, the 29th of February is removed from the array. - In the other direction the 29th of February will be missing in the output, unless `missing` is specified, in which case that value is inserted. + If the source and target calendars are either no_leap, all_leap or a + standard type, only the type of the time array is modified. + When converting to a leap year from a non-leap year, the 29th of February + is removed from the array. In the other direction the 29th of February + will be missing in the output, unless `missing` is specified, + in which case that value is inserted. For conversions involving `360_day` calendars, see Notes. - This method is safe to use with sub-daily data as it doesn't touch the time part of the timestamps. + This method is safe to use with sub-daily data as it doesn't touch the + time part of the timestamps. Parameters --------- @@ -4646,48 +4651,65 @@ def convert_calendar( dim : str Name of the time coordinate. align_on : {None, 'date', 'year'} - Must be specified when either source or target is a `360_day` calendar, ignored otherwise. See Notes. + Must be specified when either source or target is a `360_day` calendar, + ignored otherwise. See Notes. missing : Optional[any] - A value to use for filling in dates in the target that were missing in the source. - Default (None) is not to fill values, so the output time axis might be non-continuous. + A value to use for filling in dates in the target that were missing + in the source. Default (None) is not to fill values, so the output + time axis might be non-continuous. use_cftime : boolean, optional - Whether to use cftime objects in the output, only used if `calendar` is one of {"proleptic_gregorian", "gregorian" or "standard"}. - If True, the new time axis uses cftime objects. If None (default), it uses :py:class:`numpy.datetime64` values if the date range permits it, and :py:class:`cftime.datetime` objects if not. + Whether to use cftime objects in the output, only used if `calendar` + is one of {"proleptic_gregorian", "gregorian" or "standard"}. + If True, the new time axis uses cftime objects. + If None (default), it uses :py:class:`numpy.datetime64` values if the + date range permits it, and :py:class:`cftime.datetime` objects if not. If False, it uses :py:class:`numpy.datetime64` or fails. Returns ------- Copy of the dataarray with the time coordinate converted to the target calendar. - If `missing` was None (default), invalid dates in the new calendar are dropped, but missing dates are not inserted. - If `missing` was given, the new data is reindexed to have a continuous time axis, filling missing datapoints with `missing`. + If `missing` was None (default), invalid dates in the new calendar + are dropped, but missing dates are not inserted. + If `missing` was given, the new data is reindexed to have a continuous + time axis, filling missing datapoints with `missing`. Notes ----- - If one of the source or target calendars is `360_day`, `align_on` must be specified and two options are offered. + If one of the source or target calendars is `"360_day"`, `align_on` must + be specified and two options are offered. "year" - The dates are translated according to their relative position in the year, ignoring their original month and day information, - meaning that the missing/surplus days are added/removed at regular intervals. + The dates are translated according to their relative position in the year, + ignoring their original month and day information, meaning that the + missing/surplus days are added/removed at regular intervals. - From a `360_day` to a standard calendar, the output will be missing the following dates (day of year in parenthesis): + From a `360_day` to a standard calendar, the output will be missing the + following dates (day of year in parentheses): To a leap year: - January 31st (31), March 31st (91), June 1st (153), July 31st (213), September 31st (275) and November 30th (335). + January 31st (31), March 31st (91), June 1st (153), July 31st (213), + September 31st (275) and November 30th (335). To a non-leap year: - February 6th (36), April 19th (109), July 2nd (183), September 12th (255), November 25th (329). + February 6th (36), April 19th (109), July 2nd (183), + September 12th (255), November 25th (329). - From standard calendar to a '360_day', the following dates in the source array will be dropped: + From a standard calendar to a `"360_day"`, the following dates in the + source array will be dropped: From a leap year: - January 31st (31), April 1st (92), June 1st (153), August 1st (214), September 31st (275), December 1st (336) + January 31st (31), April 1st (92), June 1st (153), August 1st (214), + September 31st (275), December 1st (336) From a non-leap year: - February 6th (37), April 20th (110), July 2nd (183), September 13th (256), November 25th (329) + February 6th (37), April 20th (110), July 2nd (183), + September 13th (256), November 25th (329) This option is best used on daily and subdaily data. "date" - The month/day information is conserved and invalid dates are dropped from the output. This means that when converting from - a `360_day` to a standard calendar, all 31st (Jan, March, May, July, August, October and December) will be missing as there is no equivalent - dates in the `360_day` and the 29th (on non-leap years) and 30th of February will be dropped as there are no equivalent dates in - a standard calendar. + The month/day information is conserved and invalid dates are dropped + from the output. This means that when converting from a `"360_day"` to a + standard calendar, all 31st (Jan, March, May, July, August, October and + December) will be missing as there is no equivalent dates in the + `"360_day"` calendar and the 29th (on non-leap years) and 30th of February + will be dropped as there are no equivalent dates in a standard calendar. This option is best used with data on a frequency coarser than daily. """ @@ -4707,17 +4729,20 @@ def interp_calendar( ) -> "DataArray": """Interpolates the DataArray to another calendar based on decimal year measure. - Each timestamp in the dataset and the target are first converted to their decimal year equivalent - then source is interpolated on the target coordinate. The decimal year is the number of - years since 0001-01-01 AD. - Ex: '2000-03-01 12:00' is 2000.1653 in a standard calendar or 2000.16301 in a 'noleap' calendar. + Each timestamp in `source` and `target` are first converted to their decimal + year equivalent then `source` is interpolated on the target coordinate. + The decimal year of a timestamp is its year plus its sub-year component + converted to the fraction of its year. For example "2000-03-01 12:00" is + 2000.1653 in a standard calendar or 2000.16301 in a `"noleap"` calendar. - This method should only be used when the time (HH:MM:SS) information of time coordinate is not important. + This method should only be used when the time (HH:MM:SS) information of + time coordinate is not important. Parameters ---------- target: DataArray or DatetimeIndex or CFTimeIndex - The target time coordinate of a valid dtype (np.datetime64 or cftime objects) + The target time coordinate of a valid dtype + (np.datetime64 or cftime objects) dim : str The time coordinate name. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 6c99cd5d9f0..70f956a910d 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -7696,15 +7696,20 @@ def convert_calendar( ) -> "Dataset": """Convert the Dataset to another calendar. - Only converts the individual timestamps, does not modify any data except in dropping invalid/surplus dates or inserting missing dates. + Only converts the individual timestamps, does not modify any data except + in dropping invalid/surplus dates or inserting missing dates. - If the source and target calendars are either no_leap, all_leap or a standard type, only the type of the time array is modified. - When converting to a leap year from a non-leap year, the 29th of February is removed from the array. - In the other direction the 29th of February will be missing in the output, unless `missing` is specified, in which case that value is inserted. + If the source and target calendars are either no_leap, all_leap or a + standard type, only the type of the time array is modified. + When converting to a leap year from a non-leap year, the 29th of February + is removed from the array. In the other direction the 29th of February + will be missing in the output, unless `missing` is specified, + in which case that value is inserted. For conversions involving `360_day` calendars, see Notes. - This method is safe to use with sub-daily data as it doesn't touch the time part of the timestamps. + This method is safe to use with sub-daily data as it doesn't touch the + time part of the timestamps. Parameters --------- @@ -7713,48 +7718,65 @@ def convert_calendar( dim : str Name of the time coordinate. align_on : {None, 'date', 'year'} - Must be specified when either source or target is a `360_day` calendar, ignored otherwise. See Notes. + Must be specified when either source or target is a `360_day` calendar, + ignored otherwise. See Notes. missing : Optional[any] - A value to use for filling in dates in the target that were missing in the source. - Default (None) is not to fill values, so the output time axis might be non-continuous. + A value to use for filling in dates in the target that were missing + in the source. Default (None) is not to fill values, so the output + time axis might be non-continuous. use_cftime : boolean, optional - Whether to use cftime objects in the output, only used if `calendar` is one of {"proleptic_gregorian", "gregorian" or "standard"}. - If True, the new time axis uses cftime objects. If None (default), it uses :py:class:`numpy.datetime64` values if the date range permits it, and :py:class:`cftime.datetime` objects if not. + Whether to use cftime objects in the output, only used if `calendar` + is one of {"proleptic_gregorian", "gregorian" or "standard"}. + If True, the new time axis uses cftime objects. + If None (default), it uses :py:class:`numpy.datetime64` values if the + date range permits it, and :py:class:`cftime.datetime` objects if not. If False, it uses :py:class:`numpy.datetime64` or fails. Returns ------- - Copy of the dataset with the time coordinate converted to the target calendar. - If `missing` was None (default), invalid dates in the new calendar are dropped, but missing dates are not inserted. - If `missing` was given, the new data is reindexed to have a continuous time axis, filling missing datapoints with `missing`. + Copy of the dataarray with the time coordinate converted to the target calendar. + If `missing` was None (default), invalid dates in the new calendar + are dropped, but missing dates are not inserted. + If `missing` was given, the new data is reindexed to have a continuous + time axis, filling missing datapoints with `missing`. Notes ----- - If one of the source or target calendars is `360_day`, `align_on` must be specified and two options are offered. + If one of the source or target calendars is `"360_day"`, `align_on` must + be specified and two options are offered. "year" - The dates are translated according to their relative position in the year, ignoring their original month and day information, - meaning that the missing/surplus days are added/removed at regular intervals. + The dates are translated according to their relative position in the year, + ignoring their original month and day information, meaning that the + missing/surplus days are added/removed at regular intervals. - From a `360_day` to a standard calendar, the output will be missing the following dates (day of year in parenthesis): + From a `360_day` to a standard calendar, the output will be missing the + following dates (day of year in parentheses): To a leap year: - January 31st (31), March 31st (91), June 1st (153), July 31st (213), September 31st (275) and November 30th (335). + January 31st (31), March 31st (91), June 1st (153), July 31st (213), + September 31st (275) and November 30th (335). To a non-leap year: - February 6th (36), April 19th (109), July 2nd (183), September 12th (255), November 25th (329). + February 6th (36), April 19th (109), July 2nd (183), + September 12th (255), November 25th (329). - From standard calendar to a '360_day', the following dates in the source array will be dropped: + From a standard calendar to a `"360_day"`, the following dates in the + source array will be dropped: From a leap year: - January 31st (31), April 1st (92), June 1st (153), August 1st (214), September 31st (275), December 1st (336) + January 31st (31), April 1st (92), June 1st (153), August 1st (214), + September 31st (275), December 1st (336) From a non-leap year: - February 6th (37), April 20th (110), July 2nd (183), September 13th (256), November 25th (329) + February 6th (37), April 20th (110), July 2nd (183), + September 13th (256), November 25th (329) This option is best used on daily and subdaily data. "date" - The month/day information is conserved and invalid dates are dropped from the output. This means that when converting from - a `360_day` to a standard calendar, all 31st (Jan, March, May, July, August, October and December) will be missing as there is no equivalent - dates in the `360_day` and the 29th (on non-leap years) and 30th of February will be dropped as there are no equivalent dates in - a standard calendar. + The month/day information is conserved and invalid dates are dropped + from the output. This means that when converting from a `"360_day"` to a + standard calendar, all 31st (Jan, March, May, July, August, October and + December) will be missing as there is no equivalent dates in the + `"360_day"` calendar and the 29th (on non-leap years) and 30th of February + will be dropped as there are no equivalent dates in a standard calendar. This option is best used with data on a frequency coarser than daily. """ @@ -7774,23 +7796,26 @@ def interp_calendar( ) -> "Dataset": """Interpolates the Dataset to another calendar based on decimal year measure. - Each timestamp in the dataset and the target are first converted to their decimal year equivalent - then source is interpolated on the target coordinate. The decimal year is the number of - years since 0001-01-01 AD. - Ex: '2000-03-01 12:00' is 2000.1653 in a standard calendar or 2000.16301 in a 'noleap' calendar. + Each timestamp in `source` and `target` are first converted to their decimal + year equivalent then `source` is interpolated on the target coordinate. + The decimal year of a timestamp is its year plus its sub-year component + converted to the fraction of its year. For example "2000-03-01 12:00" is + 2000.1653 in a standard calendar or 2000.16301 in a `"noleap"` calendar. - This method should only be used when the time (HH:MM:SS) information of time coordinate is not important. + This method should only be used when the time (HH:MM:SS) information of + time coordinate is not important. Parameters ---------- target: DataArray or DatetimeIndex or CFTimeIndex - The target time coordinate of a valid dtype (np.datetime64 or cftime objects) + The target time coordinate of a valid dtype + (np.datetime64 or cftime objects) dim : str The time coordinate name. Return ------ - Dataset + DataArray The source interpolated on the decimal years of target, """ return interp_calendar(self, target, dim=dim) From d5b50dc240134f549bd426a176ce2e7b06f514fd Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 29 Sep 2021 13:43:54 -0400 Subject: [PATCH 22/30] Change way of importing/testing for cftime --- xarray/coding/times.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 9e6b978d4af..587e9d8612b 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -354,17 +354,13 @@ def infer_calendar_name(dates): return "proleptic_gregorian" elif dates.dtype == np.dtype("O") and dates.size > 0: # Logic copied from core.common.contains_cftime_datetimes. - try: - from cftime import datetime as cftime_datetime - except ImportError: - pass - else: + if cftime is not None: sample = dates.ravel()[0] if is_duck_dask_array(sample): sample = sample.compute() if isinstance(sample, np.ndarray): sample = sample.item() - if isinstance(sample, cftime_datetime): + if isinstance(sample, cftime.datetime): return sample.calendar # Error raise if dtype is neither datetime or "O", if cftime is not importable, and if element of 'O' dtype is not cftime. From dc9338e305ddfeeb88e47ea2930ee6932d3deea4 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 29 Sep 2021 14:29:58 -0400 Subject: [PATCH 23/30] Upd the weather-climate doc page --- doc/user-guide/weather-climate.rst | 45 +++++++++++++----------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/doc/user-guide/weather-climate.rst b/doc/user-guide/weather-climate.rst index e20bd510df1..504030a6a32 100644 --- a/doc/user-guide/weather-climate.rst +++ b/doc/user-guide/weather-climate.rst @@ -127,6 +127,23 @@ using the same formatting as the standard `datetime.strftime`_ convention . dates.strftime("%c") da["time"].dt.strftime("%Y%m%d") +Conversion between non-standard calendar and to/from pandas DatetimeIndexes is +facilitated with the :py:meth:`~xarray.DataArray.convert_calendar` method (and +similarly for datasets). Here, like elsewhere in xarray, the `use_cftime` argument +controls which datetime backend is used in the output. The default (`None`) is to +use `pandas` when the calendar is standard and dates are within 1678 and 2262. + +.. ipython:: python + + dates = xr.cftime_range(start="2001", periods=24, freq="MS", calendar="noleap") + da = xr.DataArray(np.arange(24), coords=[dates], dims=["time"], name="foo") + da_std_cf = da.convert_calendar("standard", use_cftime=True) + +The data is unchanged, only the timestamps are modified. Further options are implemented +for the special `360_day` calendar and for handling missing dates. There is also +:py:meth:`~xarray.DataArray.interp_calendar` for interpolating data between +calendar. + For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: - `Partial datetime string indexing`_: @@ -150,7 +167,8 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: - Access of basic datetime components via the ``dt`` accessor (in this case just "year", "month", "day", "hour", "minute", "second", "microsecond", - "season", "dayofyear", "dayofweek", and "days_in_month"): + "season", "dayofyear", "dayofweek", and "days_in_month") with the addition + of "calendar", absent from pandas: .. ipython:: python @@ -160,6 +178,7 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: da.time.dt.dayofyear da.time.dt.dayofweek da.time.dt.days_in_month + da.time.dt.calendar - Rounding of datetimes to fixed frequencies via the ``dt`` accessor: @@ -214,30 +233,6 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: da.resample(time="81T", closed="right", label="right", base=3).mean() -.. note:: - - - For some use-cases it may still be useful to convert from - a :py:class:`~xarray.CFTimeIndex` to a :py:class:`pandas.DatetimeIndex`, - despite the difference in calendar types. The recommended way of doing this - is to use the built-in :py:meth:`~xarray.CFTimeIndex.to_datetimeindex` - method: - - .. ipython:: python - :okwarning: - - modern_times = xr.cftime_range("2000", periods=24, freq="MS", calendar="noleap") - da = xr.DataArray(range(24), [("time", modern_times)]) - da - datetimeindex = da.indexes["time"].to_datetimeindex() - da["time"] = datetimeindex - - However in this case one should use caution to only perform operations which - do not depend on differences between dates (e.g. differentiation, - interpolation, or upsampling with resample), as these could introduce subtle - and silent errors due to the difference in calendar types between the dates - encoded in your data and the dates stored in memory. - .. _Timestamp-valid range: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timestamp-limitations .. _ISO 8601 standard: https://en.wikipedia.org/wiki/ISO_8601 .. _partial datetime string indexing: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#partial-string-indexing From 822529c43c249fffb9ce4e4aa67fbe6e43dad77a Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 29 Sep 2021 14:37:25 -0400 Subject: [PATCH 24/30] fix doc examples --- doc/user-guide/weather-climate.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/user-guide/weather-climate.rst b/doc/user-guide/weather-climate.rst index 504030a6a32..85a29d6bb93 100644 --- a/doc/user-guide/weather-climate.rst +++ b/doc/user-guide/weather-climate.rst @@ -136,8 +136,8 @@ use `pandas` when the calendar is standard and dates are within 1678 and 2262. .. ipython:: python dates = xr.cftime_range(start="2001", periods=24, freq="MS", calendar="noleap") - da = xr.DataArray(np.arange(24), coords=[dates], dims=["time"], name="foo") - da_std_cf = da.convert_calendar("standard", use_cftime=True) + da_nl = xr.DataArray(np.arange(24), coords=[dates], dims=["time"], name="foo") + da_std = da.convert_calendar("standard", use_cftime=True) The data is unchanged, only the timestamps are modified. Further options are implemented for the special `360_day` calendar and for handling missing dates. There is also From b86de04bfaf0f43a20481b858d85c6db6bac0b3f Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 30 Sep 2021 10:52:59 -0400 Subject: [PATCH 25/30] Neat docs --- doc/api.rst | 4 ++ doc/user-guide/weather-climate.rst | 14 ++--- xarray/coding/cftime_offsets.py | 27 +++++---- xarray/core/dataarray.py | 97 +++++++++++++++--------------- xarray/core/dataset.py | 97 +++++++++++++++--------------- 5 files changed, 125 insertions(+), 114 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 814e1b0801f..7928ac0a2db 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -106,6 +106,8 @@ Dataset contents Dataset.drop_dims Dataset.set_coords Dataset.reset_coords + Dataset.convert_calendar + Dataset.interp_calendar Comparisons ----------- @@ -296,6 +298,8 @@ DataArray contents DataArray.drop_duplicates DataArray.reset_coords DataArray.copy + DataArray.convert_calendar + DataArray.interp_calendar **ndarray methods**: :py:attr:`~DataArray.astype` diff --git a/doc/user-guide/weather-climate.rst b/doc/user-guide/weather-climate.rst index 85a29d6bb93..893e7b50429 100644 --- a/doc/user-guide/weather-climate.rst +++ b/doc/user-guide/weather-climate.rst @@ -128,10 +128,10 @@ using the same formatting as the standard `datetime.strftime`_ convention . da["time"].dt.strftime("%Y%m%d") Conversion between non-standard calendar and to/from pandas DatetimeIndexes is -facilitated with the :py:meth:`~xarray.DataArray.convert_calendar` method (and -similarly for datasets). Here, like elsewhere in xarray, the `use_cftime` argument -controls which datetime backend is used in the output. The default (`None`) is to -use `pandas` when the calendar is standard and dates are within 1678 and 2262. +facilitated with the :py:meth:`xarray.Dataset.convert_calendar` method (also available as +:py:meth:`xarray.DataArray.convert_calendar`). Here, like elsewhere in xarray, the ``use_cftime`` +argument controls which datetime backend is used in the output. The default (``None``) is to +use `pandas` when possible, i.e. when the calendar is standard and dates are within 1678 and 2262. .. ipython:: python @@ -140,9 +140,9 @@ use `pandas` when the calendar is standard and dates are within 1678 and 2262. da_std = da.convert_calendar("standard", use_cftime=True) The data is unchanged, only the timestamps are modified. Further options are implemented -for the special `360_day` calendar and for handling missing dates. There is also -:py:meth:`~xarray.DataArray.interp_calendar` for interpolating data between -calendar. +for the special ``"360_day"`` calendar and for handling missing dates. There is also +:py:meth:`xarray.Dataset.interp_calendar` (and :py:meth:`xarray.DataArray.interp_calendar`) +for `interpolating` data between calendars. For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 8dd3d335adc..f853a85aa07 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -1075,7 +1075,7 @@ def date_range( If True, always return a CFTimeIndex. If False, return a pd.DatetimeIndex if possible or raise a ValueError. If None (default), return a pd.DatetimeIndex if possible, - otherwise return a CFTimeIndex. Defaults to False if `tz` is not None. + otherwise return a CFTimeIndex. Defaults to False if `tz` is not None. Returns ------- @@ -1085,6 +1085,7 @@ def date_range( -------- pandas.date_range cftime_range + date_range_like """ from .times import _is_standard_calendar @@ -1132,24 +1133,24 @@ def date_range_like(source, calendar, use_cftime=None): Parameters ---------- source : DataArray, CFTimeIndex, or pd.DatetimeIndex - 1D datetime array + 1D datetime array calendar : str - New calendar name. + New calendar name. use_cftime : bool, optional - If True, the output uses :py:class:`cftime.datetime` objects. - If None (default), :py:class:`numpy.datetime64` values are used if possible. - If False, :py:class:`numpy.datetime64` values are used or an error is raised. + If True, the output uses :py:class:`cftime.datetime` objects. + If None (default), :py:class:`numpy.datetime64` values are used if possible. + If False, :py:class:`numpy.datetime64` values are used or an error is raised. Returns ------- DataArray - 1D datetime coordinate with the same start, end and frequency as the - source, but in the new calendar. - The start date is assumed to exist in the target calendar. - If the end date doesn't exist, the code tries 1 and 2 calendar days - before, with the exception of when the source time series is daily or - coarser. In that case if the end of the input range is on the last day - of the month, the output range will also end on the last day of the month in the new calendar. + 1D datetime coordinate with the same start, end and frequency as the + source, but in the new calendar. The start date is assumed to exist in + the target calendar. If the end date doesn't exist, the code tries 1 + and 2 calendar days before. There is a special case when the source time + series is daily or coarser and the end of the input range is on the + last day of the month. Then the output range will also end on the last + day of the month in the new calendar. """ from ..core.dataarray import DataArray from .frequencies import infer_freq diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 7a1d495c810..345dc84aba4 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4647,71 +4647,74 @@ def convert_calendar( Parameters --------- calendar : str - The target calendar name. + The target calendar name. dim : str - Name of the time coordinate. + Name of the time coordinate. align_on : {None, 'date', 'year'} - Must be specified when either source or target is a `360_day` calendar, - ignored otherwise. See Notes. + Must be specified when either source or target is a `360_day` calendar, + ignored otherwise. See Notes. missing : Optional[any] - A value to use for filling in dates in the target that were missing - in the source. Default (None) is not to fill values, so the output - time axis might be non-continuous. + A value to use for filling in dates in the target that were missing + in the source. Default (None) is not to fill values, so the output + time axis might be non-continuous. use_cftime : boolean, optional - Whether to use cftime objects in the output, only used if `calendar` - is one of {"proleptic_gregorian", "gregorian" or "standard"}. - If True, the new time axis uses cftime objects. - If None (default), it uses :py:class:`numpy.datetime64` values if the - date range permits it, and :py:class:`cftime.datetime` objects if not. - If False, it uses :py:class:`numpy.datetime64` or fails. + Whether to use cftime objects in the output, only used if `calendar` + is one of {"proleptic_gregorian", "gregorian" or "standard"}. + If True, the new time axis uses cftime objects. + If None (default), it uses :py:class:`numpy.datetime64` values if the + date range permits it, and :py:class:`cftime.datetime` objects if not. + If False, it uses :py:class:`numpy.datetime64` or fails. Returns ------- - Copy of the dataarray with the time coordinate converted to the target calendar. - If `missing` was None (default), invalid dates in the new calendar - are dropped, but missing dates are not inserted. - If `missing` was given, the new data is reindexed to have a continuous - time axis, filling missing datapoints with `missing`. + DataArray + Copy of the dataarray with the time coordinate converted to the + target calendar. If 'missing' was None (default), invalid dates in + the new calendar are dropped, but missing dates are not inserted. + If 'missing' was given, the new data is reindexed to have a continuous + time axis, filling missing datapoints the passed value. Notes ----- If one of the source or target calendars is `"360_day"`, `align_on` must be specified and two options are offered. - "year" - The dates are translated according to their relative position in the year, - ignoring their original month and day information, meaning that the - missing/surplus days are added/removed at regular intervals. + - "year" + The dates are translated according to their relative position in the year, + ignoring their original month and day information, meaning that the + missing/surplus days are added/removed at regular intervals. + + From a `360_day` to a standard calendar, the output will be missing the + following dates (day of year in parentheses): - From a `360_day` to a standard calendar, the output will be missing the - following dates (day of year in parentheses): To a leap year: - January 31st (31), March 31st (91), June 1st (153), July 31st (213), - September 31st (275) and November 30th (335). + January 31st (31), March 31st (91), June 1st (153), July 31st (213), + September 31st (275) and November 30th (335). To a non-leap year: - February 6th (36), April 19th (109), July 2nd (183), - September 12th (255), November 25th (329). + February 6th (36), April 19th (109), July 2nd (183), + September 12th (255), November 25th (329). + + From a standard calendar to a `"360_day"`, the following dates in the + source array will be dropped: - From a standard calendar to a `"360_day"`, the following dates in the - source array will be dropped: From a leap year: - January 31st (31), April 1st (92), June 1st (153), August 1st (214), - September 31st (275), December 1st (336) + January 31st (31), April 1st (92), June 1st (153), August 1st (214), + September 31st (275), December 1st (336) From a non-leap year: - February 6th (37), April 20th (110), July 2nd (183), - September 13th (256), November 25th (329) + February 6th (37), April 20th (110), July 2nd (183), + September 13th (256), November 25th (329) - This option is best used on daily and subdaily data. + This option is best used on daily and subdaily data. - "date" - The month/day information is conserved and invalid dates are dropped - from the output. This means that when converting from a `"360_day"` to a - standard calendar, all 31st (Jan, March, May, July, August, October and - December) will be missing as there is no equivalent dates in the - `"360_day"` calendar and the 29th (on non-leap years) and 30th of February - will be dropped as there are no equivalent dates in a standard calendar. + - "date" + The month/day information is conserved and invalid dates are dropped + from the output. This means that when converting from a `"360_day"` to a + standard calendar, all 31st (Jan, March, May, July, August, October and + December) will be missing as there is no equivalent dates in the + `"360_day"` calendar and the 29th (on non-leap years) and 30th of February + will be dropped as there are no equivalent dates in a standard calendar. - This option is best used with data on a frequency coarser than daily. + This option is best used with data on a frequency coarser than daily. """ return convert_calendar( self, @@ -4741,15 +4744,15 @@ def interp_calendar( Parameters ---------- target: DataArray or DatetimeIndex or CFTimeIndex - The target time coordinate of a valid dtype - (np.datetime64 or cftime objects) + The target time coordinate of a valid dtype + (np.datetime64 or cftime objects) dim : str - The time coordinate name. + The time coordinate name. Return ------ DataArray - The source interpolated on the decimal years of target, + The source interpolated on the decimal years of target, """ return interp_calendar(self, target, dim=dim) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 70f956a910d..516d0ec4d64 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -7714,71 +7714,74 @@ def convert_calendar( Parameters --------- calendar : str - The target calendar name. + The target calendar name. dim : str - Name of the time coordinate. + Name of the time coordinate. align_on : {None, 'date', 'year'} - Must be specified when either source or target is a `360_day` calendar, - ignored otherwise. See Notes. + Must be specified when either source or target is a `360_day` calendar, + ignored otherwise. See Notes. missing : Optional[any] - A value to use for filling in dates in the target that were missing - in the source. Default (None) is not to fill values, so the output - time axis might be non-continuous. + A value to use for filling in dates in the target that were missing + in the source. Default (None) is not to fill values, so the output + time axis might be non-continuous. use_cftime : boolean, optional - Whether to use cftime objects in the output, only used if `calendar` - is one of {"proleptic_gregorian", "gregorian" or "standard"}. - If True, the new time axis uses cftime objects. - If None (default), it uses :py:class:`numpy.datetime64` values if the - date range permits it, and :py:class:`cftime.datetime` objects if not. - If False, it uses :py:class:`numpy.datetime64` or fails. + Whether to use cftime objects in the output, only used if `calendar` + is one of {"proleptic_gregorian", "gregorian" or "standard"}. + If True, the new time axis uses cftime objects. + If None (default), it uses :py:class:`numpy.datetime64` values if the + date range permits it, and :py:class:`cftime.datetime` objects if not. + If False, it uses :py:class:`numpy.datetime64` or fails. Returns ------- - Copy of the dataarray with the time coordinate converted to the target calendar. - If `missing` was None (default), invalid dates in the new calendar - are dropped, but missing dates are not inserted. - If `missing` was given, the new data is reindexed to have a continuous - time axis, filling missing datapoints with `missing`. + Dataset + Copy of the dataarray with the time coordinate converted to the + target calendar. If 'missing' was None (default), invalid dates in + the new calendar are dropped, but missing dates are not inserted. + If 'missing' was given, the new data is reindexed to have a continuous + time axis, filling missing datapoints the passed value. Notes ----- If one of the source or target calendars is `"360_day"`, `align_on` must be specified and two options are offered. - "year" - The dates are translated according to their relative position in the year, - ignoring their original month and day information, meaning that the - missing/surplus days are added/removed at regular intervals. + - "year" + The dates are translated according to their relative position in the year, + ignoring their original month and day information, meaning that the + missing/surplus days are added/removed at regular intervals. + + From a `360_day` to a standard calendar, the output will be missing the + following dates (day of year in parentheses): - From a `360_day` to a standard calendar, the output will be missing the - following dates (day of year in parentheses): To a leap year: - January 31st (31), March 31st (91), June 1st (153), July 31st (213), - September 31st (275) and November 30th (335). + January 31st (31), March 31st (91), June 1st (153), July 31st (213), + September 31st (275) and November 30th (335). To a non-leap year: - February 6th (36), April 19th (109), July 2nd (183), - September 12th (255), November 25th (329). + February 6th (36), April 19th (109), July 2nd (183), + September 12th (255), November 25th (329). + + From a standard calendar to a `"360_day"`, the following dates in the + source array will be dropped: - From a standard calendar to a `"360_day"`, the following dates in the - source array will be dropped: From a leap year: - January 31st (31), April 1st (92), June 1st (153), August 1st (214), - September 31st (275), December 1st (336) + January 31st (31), April 1st (92), June 1st (153), August 1st (214), + September 31st (275), December 1st (336) From a non-leap year: - February 6th (37), April 20th (110), July 2nd (183), - September 13th (256), November 25th (329) + February 6th (37), April 20th (110), July 2nd (183), + September 13th (256), November 25th (329) - This option is best used on daily and subdaily data. + This option is best used on daily and subdaily data. - "date" - The month/day information is conserved and invalid dates are dropped - from the output. This means that when converting from a `"360_day"` to a - standard calendar, all 31st (Jan, March, May, July, August, October and - December) will be missing as there is no equivalent dates in the - `"360_day"` calendar and the 29th (on non-leap years) and 30th of February - will be dropped as there are no equivalent dates in a standard calendar. + - "date" + The month/day information is conserved and invalid dates are dropped + from the output. This means that when converting from a `"360_day"` to a + standard calendar, all 31st (Jan, March, May, July, August, October and + December) will be missing as there is no equivalent dates in the + `"360_day"` calendar and the 29th (on non-leap years) and 30th of February + will be dropped as there are no equivalent dates in a standard calendar. - This option is best used with data on a frequency coarser than daily. + This option is best used with data on a frequency coarser than daily. """ return convert_calendar( self, @@ -7808,14 +7811,14 @@ def interp_calendar( Parameters ---------- target: DataArray or DatetimeIndex or CFTimeIndex - The target time coordinate of a valid dtype - (np.datetime64 or cftime objects) + The target time coordinate of a valid dtype + (np.datetime64 or cftime objects) dim : str - The time coordinate name. + The time coordinate name. Return ------ DataArray - The source interpolated on the decimal years of target, + The source interpolated on the decimal years of target, """ return interp_calendar(self, target, dim=dim) From d7efe8e9577e6e6278ade245511c2a7bd0cb0c65 Mon Sep 17 00:00:00 2001 From: Phobos Date: Tue, 12 Oct 2021 13:54:05 -0400 Subject: [PATCH 26/30] fix in tests after review --- xarray/core/accessor_dt.py | 3 +- xarray/tests/test_calendar_ops.py | 48 ++++++++++++++++++++++++++--- xarray/tests/test_cftime_offsets.py | 25 +++++++++------ 3 files changed, 61 insertions(+), 15 deletions(-) diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index b82b07ecdfe..a9504c4c117 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -454,7 +454,8 @@ def weekofyear(self): def calendar(self): """The name of the calendar of the dates. - Only relevant for arrays of :py:class:`cftime.datetime` objects, returns "proleptic_gregorian" for arrays of :py:class:`numpy.datetime64` values. + Only relevant for arrays of :py:class:`cftime.datetime` objects, + returns "proleptic_gregorian" for arrays of :py:class:`numpy.datetime64` values. """ return infer_calendar_name(self._obj.data) diff --git a/xarray/tests/test_calendar_ops.py b/xarray/tests/test_calendar_ops.py index 59ea52f2db2..8d1ddcf4689 100644 --- a/xarray/tests/test_calendar_ops.py +++ b/xarray/tests/test_calendar_ops.py @@ -4,6 +4,7 @@ from xarray import DataArray, infer_freq from xarray.coding.calendar_ops import convert_calendar, interp_calendar from xarray.coding.cftime_offsets import date_range +from xarray.testing import assert_identical from . import requires_cftime @@ -33,6 +34,32 @@ def test_convert_calendar(source, target, use_cftime, freq): assert conv.time.dt.calendar == target + if source != "noleap": + expected_times = date_range( + "2004-01-01", + "2004-12-31", + freq=freq, + use_cftime=use_cftime, + calendar=target, + ) + else: + expected_times_pre_leap = date_range( + "2004-01-01", + "2004-02-28", + freq=freq, + use_cftime=use_cftime, + calendar=target, + ) + expected_times_post_leap = date_range( + "2004-03-01", + "2004-12-31", + freq=freq, + use_cftime=use_cftime, + calendar=target, + ) + expected_times = expected_times_pre_leap.append(expected_times_post_leap) + np.testing.assert_array_equal(conv.time, expected_times) + @pytest.mark.parametrize( "source,target,freq", @@ -105,8 +132,22 @@ def test_convert_calendar_missing(source, target, freq): ) out = convert_calendar(da_src, target, missing=np.nan, align_on="date") assert infer_freq(out.time) == freq - if source == "360_day": - assert out.time[-1].dt.day == 31 + + expected = date_range( + "2004-01-01", + "2004-12-31" if target != "360_day" else "2004-12-30", + freq=freq, + calendar=target, + ) + np.testing.assert_array_equal(out.time, expected) + + if freq != "M": + out_without_missing = convert_calendar(da_src, target, align_on="date") + expected_nan = out.isel(time=~out.time.isin(out_without_missing.time)) + assert expected_nan.isnull().all() + + expected_not_nan = out.sel(time=out_without_missing.time) + assert_identical(expected_not_nan, out_without_missing) @requires_cftime @@ -174,8 +215,7 @@ def test_interp_calendar(source, target): ) conv = interp_calendar(da_src, tgt) - assert conv.size == tgt.size - assert conv.time.dt.calendar == target + assert_identical(tgt.time, conv.time) np.testing.assert_almost_equal(conv.max(), 1, 2) assert conv.min() == 0 diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index a13f34ddd39..061c1420aba 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1266,26 +1266,31 @@ def test_date_range_errors(): @requires_cftime @pytest.mark.parametrize( - "args,cal_src,cal_tgt,use_cftime,exp0", + "start,freq,cal_src,cal_tgt,use_cftime,exp0,exp_pd", [ - (("2020-02-01", None, 12, "4M"), "standard", "noleap", None, "2020-02-28"), - (("2020-02-01", None, 12, "M"), "noleap", "gregorian", None, "2020-02-29"), - (("2020-02-28", None, 12, "3H"), "all_leap", "gregorian", False, "2020-02-28"), - (("2020-03-30", None, 12, "M"), "360_day", "gregorian", False, "2020-03-31"), - (("2020-03-31", None, 12, "M"), "gregorian", "360_day", None, "2020-03-30"), + ("2020-02-01", "4M", "standard", "noleap", None, "2020-02-28", False), + ("2020-02-01", "M", "noleap", "gregorian", True, "2020-02-29", True), + ("2020-02-28", "3H", "all_leap", "gregorian", False, "2020-02-28", True), + ("2020-03-30", "M", "360_day", "gregorian", False, "2020-03-31", True), + ("2020-03-31", "M", "gregorian", "360_day", None, "2020-03-30", False), ], ) -def test_date_range_like(args, cal_src, cal_tgt, use_cftime, exp0): - start, end, periods, freq = args - source = date_range(start, end, periods, freq, calendar=cal_src) +def test_date_range_like(start, freq, cal_src, cal_tgt, use_cftime, exp0, exp_pd): + source = date_range(start, periods=12, freq=freq, calendar=cal_src) out = date_range_like(source, cal_tgt, use_cftime=use_cftime) - assert len(out) == periods + assert len(out) == 12 assert infer_freq(out) == freq assert out[0].isoformat().startswith(exp0) + if exp_pd: + assert isinstance(out, pd.DatetimeIndex) + else: + assert isinstance(out, CFTimeIndex) + assert out.calendar == cal_tgt + def test_date_range_like_same_calendar(): src = date_range("2000-01-01", periods=12, freq="6H", use_cftime=False) From 44303502b10ea39eb5937973062eaf060a7aae60 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 18 Oct 2021 14:42:53 -0400 Subject: [PATCH 27/30] Apply suggestions from code review Co-authored-by: Spencer Clark --- xarray/coding/calendar_ops.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index 7c951f0c5ec..efdfbda258b 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -66,9 +66,16 @@ def convert_calendar( Must be specified when either the source or target is a `"360_day"` calendar; ignored otherwise. See Notes. missing : any, optional - A value to use for filling in dates in the target calendar that were - missing in the source's. Default (None) is not to fill values, so the - output time axis might be non-continuous. + By default, i.e. if the value is None, this method will simply attempt + to convert the dates in the source calendar to the same dates in the + target calendar, and drop any of those that are not possible to + represent. If a value is provided, a new time coordinate will be + created in the target calendar with the same frequency as the original + time coordinate; for any dates that are not present in the source, the + data will be filled with this value. Note that using this mode requires + that the source data have an inferable frequency; for more information + see :py:func:`xarray.infer_freq`. For certain frequency, source, and + target calendar combinations, this could result in many missing values. use_cftime : bool, optional Whether to use cftime objects in the output, only used if `calendar` is one of {"proleptic_gregorian", "gregorian" or "standard"}. @@ -82,8 +89,9 @@ def convert_calendar( Copy of source with the time coordinate converted to the target calendar. If `missing` was None (default), invalid dates in the new calendar are dropped, but missing dates are not inserted. - If `missing` was given, the new data is reindexed to have a continuous - time axis, filling missing datapoints with `missing`. + If `missing` was given, the new data is reindexed to have a time axis + with the same frequency as the source, but in the new calendar; any + missing datapoints are filled with `missing`. Notes ----- From 790be229aa9f27bc7adfe0dbdb678360894c53d4 Mon Sep 17 00:00:00 2001 From: Phobos Date: Mon, 18 Oct 2021 14:54:58 -0400 Subject: [PATCH 28/30] Better explain missing in notes - copy changes to obj methods --- xarray/coding/calendar_ops.py | 11 ++++++++++- xarray/core/dataarray.py | 27 ++++++++++++++++++++++----- xarray/core/dataset.py | 27 ++++++++++++++++++++++----- 3 files changed, 54 insertions(+), 11 deletions(-) diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index efdfbda258b..79477d2c9ed 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -75,7 +75,7 @@ def convert_calendar( data will be filled with this value. Note that using this mode requires that the source data have an inferable frequency; for more information see :py:func:`xarray.infer_freq`. For certain frequency, source, and - target calendar combinations, this could result in many missing values. + target calendar combinations, this could result in many missing values, see notes. use_cftime : bool, optional Whether to use cftime objects in the output, only used if `calendar` is one of {"proleptic_gregorian", "gregorian" or "standard"}. @@ -95,6 +95,15 @@ def convert_calendar( Notes ----- + Passing a value to `missing` is only usable if the source's time coordinate as an + inferrable frequencies (see :py:func:`~xarray.infer_freq`) and is only appropriate + if the target coordinate, generated from this frequency, has dates equivalent to the + source. It is usually **not** appropriate to use this mode with: + + - Period-end frequencies : 'A', 'Y', 'Q' or 'M', in opposition to 'AS' 'YS', 'QS' and 'MS' + - Sub-monthly frequencies that do not divide a day evenly : 'W', 'nD' where `N != 1` + or 'mH' where 24 % m != 0). + If one of the source or target calendars is `"360_day"`, `align_on` must be specified and two options are offered. diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 345dc84aba4..31ca850b1d8 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4654,9 +4654,16 @@ def convert_calendar( Must be specified when either source or target is a `360_day` calendar, ignored otherwise. See Notes. missing : Optional[any] - A value to use for filling in dates in the target that were missing - in the source. Default (None) is not to fill values, so the output - time axis might be non-continuous. + By default, i.e. if the value is None, this method will simply attempt + to convert the dates in the source calendar to the same dates in the + target calendar, and drop any of those that are not possible to + represent. If a value is provided, a new time coordinate will be + created in the target calendar with the same frequency as the original + time coordinate; for any dates that are not present in the source, the + data will be filled with this value. Note that using this mode requires + that the source data have an inferable frequency; for more information + see :py:func:`xarray.infer_freq`. For certain frequency, source, and + target calendar combinations, this could result in many missing values, see notes. use_cftime : boolean, optional Whether to use cftime objects in the output, only used if `calendar` is one of {"proleptic_gregorian", "gregorian" or "standard"}. @@ -4671,11 +4678,21 @@ def convert_calendar( Copy of the dataarray with the time coordinate converted to the target calendar. If 'missing' was None (default), invalid dates in the new calendar are dropped, but missing dates are not inserted. - If 'missing' was given, the new data is reindexed to have a continuous - time axis, filling missing datapoints the passed value. + If `missing` was given, the new data is reindexed to have a time axis + with the same frequency as the source, but in the new calendar; any + missing datapoints are filled with `missing`. Notes ----- + Passing a value to `missing` is only usable if the source's time coordinate as an + inferrable frequencies (see :py:func:`~xarray.infer_freq`) and is only appropriate + if the target coordinate, generated from this frequency, has dates equivalent to the + source. It is usually **not** appropriate to use this mode with: + + - Period-end frequencies : 'A', 'Y', 'Q' or 'M', in opposition to 'AS' 'YS', 'QS' and 'MS' + - Sub-monthly frequencies that do not divide a day evenly : 'W', 'nD' where `N != 1` + or 'mH' where 24 % m != 0). + If one of the source or target calendars is `"360_day"`, `align_on` must be specified and two options are offered. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 516d0ec4d64..fbfce16a2cb 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -7721,9 +7721,16 @@ def convert_calendar( Must be specified when either source or target is a `360_day` calendar, ignored otherwise. See Notes. missing : Optional[any] - A value to use for filling in dates in the target that were missing - in the source. Default (None) is not to fill values, so the output - time axis might be non-continuous. + By default, i.e. if the value is None, this method will simply attempt + to convert the dates in the source calendar to the same dates in the + target calendar, and drop any of those that are not possible to + represent. If a value is provided, a new time coordinate will be + created in the target calendar with the same frequency as the original + time coordinate; for any dates that are not present in the source, the + data will be filled with this value. Note that using this mode requires + that the source data have an inferable frequency; for more information + see :py:func:`xarray.infer_freq`. For certain frequency, source, and + target calendar combinations, this could result in many missing values, see notes. use_cftime : boolean, optional Whether to use cftime objects in the output, only used if `calendar` is one of {"proleptic_gregorian", "gregorian" or "standard"}. @@ -7738,11 +7745,21 @@ def convert_calendar( Copy of the dataarray with the time coordinate converted to the target calendar. If 'missing' was None (default), invalid dates in the new calendar are dropped, but missing dates are not inserted. - If 'missing' was given, the new data is reindexed to have a continuous - time axis, filling missing datapoints the passed value. + If `missing` was given, the new data is reindexed to have a time axis + with the same frequency as the source, but in the new calendar; any + missing datapoints are filled with `missing`. Notes ----- + Passing a value to `missing` is only usable if the source's time coordinate as an + inferrable frequencies (see :py:func:`~xarray.infer_freq`) and is only appropriate + if the target coordinate, generated from this frequency, has dates equivalent to the + source. It is usually **not** appropriate to use this mode with: + + - Period-end frequencies : 'A', 'Y', 'Q' or 'M', in opposition to 'AS' 'YS', 'QS' and 'MS' + - Sub-monthly frequencies that do not divide a day evenly : 'W', 'nD' where `N != 1` + or 'mH' where 24 % m != 0). + If one of the source or target calendars is `"360_day"`, `align_on` must be specified and two options are offered. From b6f53a8c1c3d2f0f12a818996ed46de6bd1bdf92 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Tue, 26 Oct 2021 16:02:42 -0400 Subject: [PATCH 29/30] Apply suggestions from code review Co-authored-by: Spencer Clark --- doc/whats-new.rst | 2 +- xarray/coding/calendar_ops.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index cc895741e20..41aa09e904a 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -34,7 +34,7 @@ New Features - Added ``storage_options`` argument to :py:meth:`to_zarr` (:issue:`5601`). By `Ray Bell `_, `Zachary Blackwood `_ and `Nathan Lis `_. -- Added calendar utilities :py:func:`DataArray.convert_calendar`, :py:func:`DataArray.interp_calendar`, :py:func:`date_range`, :py:func:`date_range_like` and :py:attr:`DataArray.dt.calendar`. (:pull:`5233`). +- Added calendar utilities :py:func:`DataArray.convert_calendar`, :py:func:`DataArray.interp_calendar`, :py:func:`date_range`, :py:func:`date_range_like` and :py:attr:`DataArray.dt.calendar` (:issue:`5155`, :pull:`5233`). By `Pascal Bourgault `_. - Histogram plots are set with a title displaying the scalar coords if any, similarly to the other plots (:issue:`5791`, :pull:`5792`). By `Maxime Liquet `_. diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index 79477d2c9ed..7b973c8d7ab 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -100,8 +100,8 @@ def convert_calendar( if the target coordinate, generated from this frequency, has dates equivalent to the source. It is usually **not** appropriate to use this mode with: - - Period-end frequencies : 'A', 'Y', 'Q' or 'M', in opposition to 'AS' 'YS', 'QS' and 'MS' - - Sub-monthly frequencies that do not divide a day evenly : 'W', 'nD' where `N != 1` + - Period-end frequencies: 'A', 'Y', 'Q' or 'M', in opposition to 'AS' 'YS', 'QS' and 'MS' + - Sub-monthly frequencies that do not divide a day evenly: 'W', 'nD' where `n != 1` or 'mH' where 24 % m != 0). If one of the source or target calendars is `"360_day"`, `align_on` must @@ -135,7 +135,7 @@ def convert_calendar( "date" The month/day information is conserved and invalid dates are dropped from the output. This means that when converting from a `"360_day"` to a - standard calendar, all 31st (Jan, March, May, July, August, October and + standard calendar, all 31sts (Jan, March, May, July, August, October and December) will be missing as there is no equivalent dates in the `"360_day"` calendar and the 29th (on non-leap years) and 30th of February will be dropped as there are no equivalent dates in a standard calendar. From 2c023a4f11b86c08131fc8facc750195b8f0632c Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 13 Dec 2021 15:12:24 -0500 Subject: [PATCH 30/30] Remove unused import --- xarray/coding/cftime_offsets.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index f58241edbc5..2db6d4e8097 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -42,7 +42,6 @@ import re from datetime import datetime, timedelta -from distutils.version import LooseVersion from functools import partial from typing import ClassVar, Optional