Skip to content

Commit 07430e1

Browse files
Disallow standard calendar datetime64 encoding prior to reform (#10352)
In #9618 we allowed encoding `np.datetime64` values prior to 1582-10-15 using a `"standard"` or `"gregorian"` calendar through cftime. While technically possible, this implicitly introduces a calendar change, and means the values can no longer be round tripped as `np.datetime64`—xarray will choose `cftime.DatetimeGregorian` instances when decoding instead. I am not sure how often this will come up and the behavior may not be the user's intent. This PR switches to raising a `ValueError` in this circumstance, and recommends encoding with a `"proleptic_gregorian"` calendar instead (the calendar that xarray automatically chooses for `np.datetime64` values if provided no user input).
1 parent e18336f commit 07430e1

File tree

3 files changed

+36
-32
lines changed

3 files changed

+36
-32
lines changed

doc/whats-new.rst

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ New Features
3333
Breaking changes
3434
~~~~~~~~~~~~~~~~
3535

36-
3736
Deprecations
3837
~~~~~~~~~~~~
3938

@@ -51,6 +50,14 @@ Bug fixes
5150
calculating mean in rolling for correct operations (preserve float dtypes,
5251
correct mean of bool arrays) (:issue:`10340`, :pull:`10341`).
5352
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
53+
- Raise an error when attempting to encode :py:class:`numpy.datetime64` values
54+
prior to the Gregorian calendar reform date of 1582-10-15 with a
55+
``"standard"`` or ``"gregorian"`` calendar. Previously we would warn and
56+
encode these as :py:class:`cftime.DatetimeGregorian` objects, but it is not
57+
clear that this is the user's intent, since this implicitly converts the
58+
calendar of the datetimes from ``"proleptic_gregorian"`` to ``"gregorian"``
59+
and prevents round-tripping them as :py:class:`numpy.datetime64` values
60+
(:pull:`10352`). By `Spencer Clark <https://github.com/spencerkclark>`_.
5461

5562
Documentation
5663
~~~~~~~~~~~~~

xarray/coding/times.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,6 +1065,7 @@ def _eagerly_encode_cf_datetime(
10651065
calendar = infer_calendar_name(dates)
10661066

10671067
raise_incompatible_units_error = False
1068+
raise_gregorian_proleptic_gregorian_mismatch_error = False
10681069
try:
10691070
if not _is_standard_calendar(calendar) or dates.dtype.kind == "O":
10701071
# parse with cftime instead
@@ -1073,16 +1074,7 @@ def _eagerly_encode_cf_datetime(
10731074
if calendar in ["standard", "gregorian"] and np.nanmin(dates).astype(
10741075
"=M8[us]"
10751076
).astype(datetime) < datetime(1582, 10, 15):
1076-
# if we use standard calendar and for dates before the reform
1077-
# we need to use cftime instead
1078-
emit_user_level_warning(
1079-
f"Unable to encode numpy.datetime64 objects with {calendar} calendar."
1080-
"Using cftime.datetime objects instead, reason: dates prior "
1081-
"reform date (1582-10-15). To silence this warning transform "
1082-
"numpy.datetime64 to corresponding cftime.datetime beforehand.",
1083-
SerializationWarning,
1084-
)
1085-
raise OutOfBoundsDatetime
1077+
raise_gregorian_proleptic_gregorian_mismatch_error = True
10861078

10871079
time_unit, ref_date = _unpack_time_unit_and_ref_date(units)
10881080
# calendar equivalence only for days after the reform
@@ -1166,6 +1158,16 @@ def _eagerly_encode_cf_datetime(
11661158
f"units {units!r}. Consider setting encoding['units'] to {new_units!r} to "
11671159
f"serialize with an integer dtype."
11681160
)
1161+
if raise_gregorian_proleptic_gregorian_mismatch_error:
1162+
raise ValueError(
1163+
f"Unable to encode np.datetime64 values with {calendar} "
1164+
f"calendar, because some or all values are prior to the reform "
1165+
f"date of 1582-10-15. To encode these times, set "
1166+
f"encoding['calendar'] to 'proleptic_gregorian' instead, which "
1167+
f"is the true calendar that np.datetime64 values use. The "
1168+
f"'standard' or 'gregorian' calendar is only equivalent to the "
1169+
f"'proleptic_gregorian' calendar after the reform date."
1170+
)
11691171

11701172
return num, units, calendar
11711173

xarray/tests/test_coding_times.py

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -239,8 +239,6 @@ def test_decode_non_standard_calendar_inside_timestamp_range(calendar) -> None:
239239
def test_decode_dates_outside_timestamp_range(
240240
calendar, time_unit: PDDatetimeUnitOptions
241241
) -> None:
242-
from datetime import datetime
243-
244242
import cftime
245243

246244
units = "days since 0001-01-01"
@@ -379,8 +377,6 @@ def test_decode_nonstandard_calendar_multidim_time_inside_timestamp_range(
379377
def test_decode_multidim_time_outside_timestamp_range(
380378
calendar, time_unit: PDDatetimeUnitOptions
381379
) -> None:
382-
from datetime import datetime
383-
384380
import cftime
385381

386382
units = "days since 0001-01-01"
@@ -1163,27 +1159,26 @@ def test__encode_datetime_with_cftime() -> None:
11631159

11641160

11651161
@requires_cftime
1166-
def test_encode_decode_cf_datetime_outofbounds_warnings(
1162+
def test_round_trip_standard_calendar_cftime_datetimes_pre_reform() -> None:
1163+
from cftime import DatetimeGregorian
1164+
1165+
dates = np.array([DatetimeGregorian(1, 1, 1), DatetimeGregorian(2000, 1, 1)])
1166+
encoded = encode_cf_datetime(dates, "seconds since 2000-01-01", "standard")
1167+
with pytest.warns(SerializationWarning, match="Unable to decode time axis"):
1168+
decoded = decode_cf_datetime(*encoded)
1169+
np.testing.assert_equal(decoded, dates)
1170+
1171+
1172+
@pytest.mark.parametrize("calendar", ["standard", "gregorian"])
1173+
def test_encode_cf_datetime_gregorian_proleptic_gregorian_mismatch_error(
1174+
calendar: str,
11671175
time_unit: PDDatetimeUnitOptions,
11681176
) -> None:
1169-
import cftime
1170-
11711177
if time_unit == "ns":
1172-
pytest.skip("does not work work out of bounds datetimes")
1178+
pytest.skip("datetime64[ns] values can only be defined post reform")
11731179
dates = np.array(["0001-01-01", "2001-01-01"], dtype=f"datetime64[{time_unit}]")
1174-
cfdates = np.array(
1175-
[
1176-
cftime.datetime(t0.year, t0.month, t0.day, calendar="gregorian")
1177-
for t0 in dates.astype(datetime)
1178-
]
1179-
)
1180-
with pytest.warns(
1181-
SerializationWarning, match="Unable to encode numpy.datetime64 objects"
1182-
):
1183-
encoded = encode_cf_datetime(dates, "seconds since 2000-01-01", "standard")
1184-
with pytest.warns(SerializationWarning, match="Unable to decode time axis"):
1185-
decoded = decode_cf_datetime(*encoded)
1186-
np.testing.assert_equal(decoded, cfdates)
1180+
with pytest.raises(ValueError, match="proleptic_gregorian"):
1181+
encode_cf_datetime(dates, "seconds since 2000-01-01", calendar)
11871182

11881183

11891184
@pytest.mark.parametrize("calendar", ["gregorian", "Gregorian", "GREGORIAN"])

0 commit comments

Comments
 (0)