Skip to content

Disallow standard calendar np.datetime64 encoding prior to reform #10352

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ New Features
Breaking changes
~~~~~~~~~~~~~~~~


Deprecations
~~~~~~~~~~~~

Expand All @@ -51,6 +50,14 @@ Bug fixes
calculating mean in rolling for correct operations (preserve float dtypes,
correct mean of bool arrays) (:issue:`10340`, :pull:`10341`).
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
- Raise an error when attempting to encode :py:class:`numpy.datetime64` values
prior to the Gregorian calendar reform date of 1582-10-15 with a
``"standard"`` or ``"gregorian"`` calendar. Previously we would warn and
encode these as :py:class:`cftime.DatetimeGregorian` objects, but it is not
clear that this is the user's intent, since this implicitly converts the
calendar of the datetimes from ``"proleptic_gregorian"`` to ``"gregorian"``
and prevents round-tripping them as :py:class:`numpy.datetime64` values
(:pull:`10352`). By `Spencer Clark <https://github.com/spencerkclark>`_.

Documentation
~~~~~~~~~~~~~
Expand Down
22 changes: 12 additions & 10 deletions xarray/coding/times.py
Original file line number Diff line number Diff line change
Expand Up @@ -1065,6 +1065,7 @@ def _eagerly_encode_cf_datetime(
calendar = infer_calendar_name(dates)

raise_incompatible_units_error = False
raise_gregorian_proleptic_gregorian_mismatch_error = False
try:
if not _is_standard_calendar(calendar) or dates.dtype.kind == "O":
# parse with cftime instead
Expand All @@ -1073,16 +1074,7 @@ def _eagerly_encode_cf_datetime(
if calendar in ["standard", "gregorian"] and np.nanmin(dates).astype(
"=M8[us]"
).astype(datetime) < datetime(1582, 10, 15):
# if we use standard calendar and for dates before the reform
# we need to use cftime instead
emit_user_level_warning(
f"Unable to encode numpy.datetime64 objects with {calendar} calendar."
"Using cftime.datetime objects instead, reason: dates prior "
"reform date (1582-10-15). To silence this warning transform "
"numpy.datetime64 to corresponding cftime.datetime beforehand.",
SerializationWarning,
)
raise OutOfBoundsDatetime
raise_gregorian_proleptic_gregorian_mismatch_error = True

time_unit, ref_date = _unpack_time_unit_and_ref_date(units)
# calendar equivalence only for days after the reform
Expand Down Expand Up @@ -1166,6 +1158,16 @@ def _eagerly_encode_cf_datetime(
f"units {units!r}. Consider setting encoding['units'] to {new_units!r} to "
f"serialize with an integer dtype."
)
if raise_gregorian_proleptic_gregorian_mismatch_error:
raise ValueError(
f"Unable to encode np.datetime64 values with {calendar} "
f"calendar, because some or all values are prior to the reform "
f"date of 1582-10-15. To encode these times, set "
f"encoding['calendar'] to 'proleptic_gregorian' instead, which "
f"is the true calendar that np.datetime64 values use. The "
f"'standard' or 'gregorian' calendar is only equivalent to the "
f"'proleptic_gregorian' calendar after the reform date."
)

return num, units, calendar

Expand Down
37 changes: 16 additions & 21 deletions xarray/tests/test_coding_times.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,8 +239,6 @@ def test_decode_non_standard_calendar_inside_timestamp_range(calendar) -> None:
def test_decode_dates_outside_timestamp_range(
calendar, time_unit: PDDatetimeUnitOptions
) -> None:
from datetime import datetime

import cftime

units = "days since 0001-01-01"
Expand Down Expand Up @@ -379,8 +377,6 @@ def test_decode_nonstandard_calendar_multidim_time_inside_timestamp_range(
def test_decode_multidim_time_outside_timestamp_range(
calendar, time_unit: PDDatetimeUnitOptions
) -> None:
from datetime import datetime

import cftime

units = "days since 0001-01-01"
Expand Down Expand Up @@ -1163,27 +1159,26 @@ def test__encode_datetime_with_cftime() -> None:


@requires_cftime
def test_encode_decode_cf_datetime_outofbounds_warnings(
def test_round_trip_standard_calendar_cftime_datetimes_pre_reform() -> None:
from cftime import DatetimeGregorian

dates = np.array([DatetimeGregorian(1, 1, 1), DatetimeGregorian(2000, 1, 1)])
encoded = encode_cf_datetime(dates, "seconds since 2000-01-01", "standard")
with pytest.warns(SerializationWarning, match="Unable to decode time axis"):
decoded = decode_cf_datetime(*encoded)
np.testing.assert_equal(decoded, dates)


@pytest.mark.parametrize("calendar", ["standard", "gregorian"])
def test_encode_cf_datetime_gregorian_proleptic_gregorian_mismatch_error(
calendar: str,
time_unit: PDDatetimeUnitOptions,
) -> None:
import cftime

if time_unit == "ns":
pytest.skip("does not work work out of bounds datetimes")
pytest.skip("datetime64[ns] values can only be defined post reform")
dates = np.array(["0001-01-01", "2001-01-01"], dtype=f"datetime64[{time_unit}]")
cfdates = np.array(
[
cftime.datetime(t0.year, t0.month, t0.day, calendar="gregorian")
for t0 in dates.astype(datetime)
]
)
with pytest.warns(
SerializationWarning, match="Unable to encode numpy.datetime64 objects"
):
encoded = encode_cf_datetime(dates, "seconds since 2000-01-01", "standard")
with pytest.warns(SerializationWarning, match="Unable to decode time axis"):
decoded = decode_cf_datetime(*encoded)
np.testing.assert_equal(decoded, cfdates)
with pytest.raises(ValueError, match="proleptic_gregorian"):
encode_cf_datetime(dates, "seconds since 2000-01-01", calendar)


@pytest.mark.parametrize("calendar", ["gregorian", "Gregorian", "GREGORIAN"])
Expand Down
Loading