-
-
Notifications
You must be signed in to change notification settings - Fork 18.7k
Description
Pandas version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
import pandas as pd
# https://www.timeanddate.com/time/change/belgium/brussels?year=2022
ts = pd.Timestamp("2022-10-30", tz="Europe/Brussels")
offset_explicit_keyword = ts + pd.offsets.DateOffset(days=1)
offset_default_value = ts + pd.offsets.DateOffset(1)
assert offset_explicit_keyword == offset_default_value
Issue Description
According to the documentation, pd.offsets.DateOffset(1)
should be the same as pd.offsets.DateOffset(days=1)
During instanciation the following function is called :
pandas/pandas/_libs/tslibs/offsets.pyx
Lines 284 to 334 in bc6ad14
cdef _determine_offset(kwds): | |
if not kwds: | |
# GH 45643/45890: (historically) defaults to 1 day | |
return timedelta(days=1), False | |
if "millisecond" in kwds: | |
raise NotImplementedError( | |
"Using DateOffset to replace `millisecond` component in " | |
"datetime object is not supported. Use " | |
"`microsecond=timestamp.microsecond % 1000 + ms * 1000` " | |
"instead." | |
) | |
nanos = {"nanosecond", "nanoseconds"} | |
# nanos are handled by apply_wraps | |
if all(k in nanos for k in kwds): | |
return timedelta(days=0), False | |
kwds_no_nanos = {k: v for k, v in kwds.items() if k not in nanos} | |
kwds_use_relativedelta = { | |
"year", "month", "day", "hour", "minute", | |
"second", "microsecond", "weekday", "years", "months", "weeks", "days", | |
"hours", "minutes", "seconds", "microseconds" | |
} | |
# "weeks" and "days" are left out despite being valid args for timedelta, | |
# because (historically) timedelta is used only for sub-daily. | |
kwds_use_timedelta = { | |
"seconds", "microseconds", "milliseconds", "minutes", "hours", | |
} | |
if all(k in kwds_use_timedelta for k in kwds_no_nanos): | |
# Sub-daily offset - use timedelta (tz-aware) | |
# This also handles "milliseconds" (plur): see GH 49897 | |
return timedelta(**kwds_no_nanos), False | |
# convert milliseconds to microseconds, so relativedelta can parse it | |
if "milliseconds" in kwds_no_nanos: | |
micro = kwds_no_nanos.pop("milliseconds") * 1000 | |
kwds_no_nanos["microseconds"] = kwds_no_nanos.get("microseconds", 0) + micro | |
if all(k in kwds_use_relativedelta for k in kwds_no_nanos): | |
from dateutil.relativedelta import relativedelta | |
return relativedelta(**kwds_no_nanos), True | |
raise ValueError( | |
f"Invalid argument/s or bad combination of arguments: {list(kwds.keys())}" | |
) |
For pd.offsets.DateOffset(1)
it returns timedelta(days=1), False
, whereas for pd.offsets.DateOffset(days=1)
this returns relativedelta(days=1), True
. This causes inconsistencies in the behavior of the two near DST transitions.
Expected Behavior
Either the doc can be changed or we ensure pd.offsets.DateOffset(1)
equals pd.offsets.DateOffset(days=1)
by my understanding this could be done in the following way
cdef _determine_offset(kwds):
if not kwds:
+ from dateutil.relativedelta import relativedelta
+
# GH 45643/45890: (historically) defaults to 1 day
- return timedelta(days=1), False
+ return relativedelta(days=1), True
Installed Versions
INSTALLED VERSIONS
commit : c888af6
python : 3.12.10
python-bits : 64
OS : Darwin
OS-release : 24.5.0
Version : Darwin Kernel Version 24.5.0: Tue Apr 22 19:54:25 PDT 2025; root:xnu-11417.121.6~2/RELEASE_ARM64_T6020
machine : arm64
processor : arm
byteorder : little
LC_ALL : None
LANG : None
LOCALE : en_US.UTF-8
pandas : 2.3.1
numpy : 2.3.1
pytz : 2025.2
dateutil : 2.9.0.post0
pip : None
Cython : None
sphinx : None
IPython : None
adbc-driver-postgresql: None
adbc-driver-sqlite : None
bs4 : None
blosc : None
bottleneck : None
dataframe-api-compat : None
fastparquet : None
fsspec : None
html5lib : None
hypothesis : None
gcsfs : None
jinja2 : None
lxml.etree : None
matplotlib : None
numba : None
numexpr : None
odfpy : None
openpyxl : None
pandas_gbq : None
psycopg2 : None
pymysql : None
pyarrow : None
pyreadstat : None
pytest : None
python-calamine : None
pyxlsb : None
s3fs : None
scipy : None
sqlalchemy : None
tables : None
tabulate : None
xarray : None
xlrd : None
xlsxwriter : None
zstandard : None
tzdata : 2025.2
qtpy : None
pyqt5 : None