From dca549e3d2abc6c6ba7f45cc4ad786b2c67a8310 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 24 May 2025 09:12:11 -0400 Subject: [PATCH] Disallow standard calendar datetime64 encoding prior to reform --- doc/whats-new.rst | 9 +++++++- xarray/coding/times.py | 22 +++++++++--------- xarray/tests/test_coding_times.py | 37 +++++++++++++------------------ 3 files changed, 36 insertions(+), 32 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a581b22f5b3..a936b2825a1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -33,7 +33,6 @@ New Features Breaking changes ~~~~~~~~~~~~~~~~ - Deprecations ~~~~~~~~~~~~ @@ -51,6 +50,14 @@ Bug fixes calculating mean in rolling for correct operations (preserve float dtypes, correct mean of bool arrays) (:issue:`10340`, :pull:`10341`). By `Kai Mühlbauer `_. +- Raise an error when attempting to encode :py:class:`numpy.datetime64` values + prior to the Gregorian calendar reform date of 1582-10-15 with a + ``"standard"`` or ``"gregorian"`` calendar. Previously we would warn and + encode these as :py:class:`cftime.DatetimeGregorian` objects, but it is not + clear that this is the user's intent, since this implicitly converts the + calendar of the datetimes from ``"proleptic_gregorian"`` to ``"gregorian"`` + and prevents round-tripping them as :py:class:`numpy.datetime64` values + (:pull:`10352`). By `Spencer Clark `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 55cb17169d7..bec374e4fd9 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1065,6 +1065,7 @@ def _eagerly_encode_cf_datetime( calendar = infer_calendar_name(dates) raise_incompatible_units_error = False + raise_gregorian_proleptic_gregorian_mismatch_error = False try: if not _is_standard_calendar(calendar) or dates.dtype.kind == "O": # parse with cftime instead @@ -1073,16 +1074,7 @@ def _eagerly_encode_cf_datetime( if calendar in ["standard", "gregorian"] and np.nanmin(dates).astype( "=M8[us]" ).astype(datetime) < datetime(1582, 10, 15): - # if we use standard calendar and for dates before the reform - # we need to use cftime instead - emit_user_level_warning( - f"Unable to encode numpy.datetime64 objects with {calendar} calendar." - "Using cftime.datetime objects instead, reason: dates prior " - "reform date (1582-10-15). To silence this warning transform " - "numpy.datetime64 to corresponding cftime.datetime beforehand.", - SerializationWarning, - ) - raise OutOfBoundsDatetime + raise_gregorian_proleptic_gregorian_mismatch_error = True time_unit, ref_date = _unpack_time_unit_and_ref_date(units) # calendar equivalence only for days after the reform @@ -1166,6 +1158,16 @@ def _eagerly_encode_cf_datetime( f"units {units!r}. Consider setting encoding['units'] to {new_units!r} to " f"serialize with an integer dtype." ) + if raise_gregorian_proleptic_gregorian_mismatch_error: + raise ValueError( + f"Unable to encode np.datetime64 values with {calendar} " + f"calendar, because some or all values are prior to the reform " + f"date of 1582-10-15. To encode these times, set " + f"encoding['calendar'] to 'proleptic_gregorian' instead, which " + f"is the true calendar that np.datetime64 values use. The " + f"'standard' or 'gregorian' calendar is only equivalent to the " + f"'proleptic_gregorian' calendar after the reform date." + ) return num, units, calendar diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 3b85d395fdb..22cf0edfddd 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -239,8 +239,6 @@ def test_decode_non_standard_calendar_inside_timestamp_range(calendar) -> None: def test_decode_dates_outside_timestamp_range( calendar, time_unit: PDDatetimeUnitOptions ) -> None: - from datetime import datetime - import cftime units = "days since 0001-01-01" @@ -379,8 +377,6 @@ def test_decode_nonstandard_calendar_multidim_time_inside_timestamp_range( def test_decode_multidim_time_outside_timestamp_range( calendar, time_unit: PDDatetimeUnitOptions ) -> None: - from datetime import datetime - import cftime units = "days since 0001-01-01" @@ -1163,27 +1159,26 @@ def test__encode_datetime_with_cftime() -> None: @requires_cftime -def test_encode_decode_cf_datetime_outofbounds_warnings( +def test_round_trip_standard_calendar_cftime_datetimes_pre_reform() -> None: + from cftime import DatetimeGregorian + + dates = np.array([DatetimeGregorian(1, 1, 1), DatetimeGregorian(2000, 1, 1)]) + encoded = encode_cf_datetime(dates, "seconds since 2000-01-01", "standard") + with pytest.warns(SerializationWarning, match="Unable to decode time axis"): + decoded = decode_cf_datetime(*encoded) + np.testing.assert_equal(decoded, dates) + + +@pytest.mark.parametrize("calendar", ["standard", "gregorian"]) +def test_encode_cf_datetime_gregorian_proleptic_gregorian_mismatch_error( + calendar: str, time_unit: PDDatetimeUnitOptions, ) -> None: - import cftime - if time_unit == "ns": - pytest.skip("does not work work out of bounds datetimes") + pytest.skip("datetime64[ns] values can only be defined post reform") dates = np.array(["0001-01-01", "2001-01-01"], dtype=f"datetime64[{time_unit}]") - cfdates = np.array( - [ - cftime.datetime(t0.year, t0.month, t0.day, calendar="gregorian") - for t0 in dates.astype(datetime) - ] - ) - with pytest.warns( - SerializationWarning, match="Unable to encode numpy.datetime64 objects" - ): - encoded = encode_cf_datetime(dates, "seconds since 2000-01-01", "standard") - with pytest.warns(SerializationWarning, match="Unable to decode time axis"): - decoded = decode_cf_datetime(*encoded) - np.testing.assert_equal(decoded, cfdates) + with pytest.raises(ValueError, match="proleptic_gregorian"): + encode_cf_datetime(dates, "seconds since 2000-01-01", calendar) @pytest.mark.parametrize("calendar", ["gregorian", "Gregorian", "GREGORIAN"])