diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index f5d1007dfbbbb..5dfac98d069e7 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -369,7 +369,7 @@ In practice this becomes very cumbersome because we often need a very long index with a large number of timestamps. If we need timestamps on a regular frequency, we can use the :func:`date_range` and :func:`bdate_range` functions to create a ``DatetimeIndex``. The default frequency for ``date_range`` is a -**calendar day** while the default for ``bdate_range`` is a **business day**: +**day** while the default for ``bdate_range`` is a **business day**: .. ipython:: python @@ -886,6 +886,27 @@ normalized after the function is applied. hour.apply(pd.Timestamp('2014-01-01 23:00')) +.. _timeseries.dayvscalendarday: + +Day vs. CalendarDay +~~~~~~~~~~~~~~~~~~~ + +:class:`Day` (``'D'``) is a timedelta-like offset that respects absolute time +arithmetic and is an alias for 24 :class:`Hour`. This offset is the default +argument to many pandas time related function like :func:`date_range` and :func:`timedelta_range`. + +:class:`CalendarDay` (``'CD'``) is a relativedelta-like offset that respects +calendar time arithmetic. :class:`CalendarDay` is useful preserving calendar day +semantics with date times with have day light savings transitions, i.e. :class:`CalendarDay` +will preserve the hour before the day light savings transition. + +.. ipython:: python + + ts = pd.Timestamp('2016-10-30 00:00:00', tz='Europe/Helsinki') + ts + pd.offsets.Day(1) + ts + pd.offsets.CalendarDay(1) + + Parametric Offsets ~~~~~~~~~~~~~~~~~~ @@ -1176,7 +1197,8 @@ frequencies. We will refer to these aliases as *offset aliases*. "B", "business day frequency" "C", "custom business day frequency" - "D", "calendar day frequency" + "D", "day frequency" + "CD", "calendar day frequency" "W", "weekly frequency" "M", "month end frequency" "SM", "semi-month end frequency (15th and end of month)" diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 1979bde796452..4df951ca2c3aa 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -285,6 +285,46 @@ that the dates have been converted to UTC .. ipython:: python pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"], utc=True) +.. _whatsnew_0240.api_breaking.calendarday: + +CalendarDay Offset +^^^^^^^^^^^^^^^^^^ + +:class:`Day` and associated frequency alias ``'D'`` were documented to represent +a calendar day; however, arithmetic and operations with :class:`Day` sometimes +respected absolute time instead (i.e. ``Day(n)`` and acted identically to ``Timedelta(days=n)``). + +*Previous Behavior*: + +.. code-block:: ipython + + + In [2]: ts = pd.Timestamp('2016-10-30 00:00:00', tz='Europe/Helsinki') + + # Respects calendar arithmetic + In [3]: pd.date_range(start=ts, freq='D', periods=3) + Out[3]: + DatetimeIndex(['2016-10-30 00:00:00+03:00', '2016-10-31 00:00:00+02:00', + '2016-11-01 00:00:00+02:00'], + dtype='datetime64[ns, Europe/Helsinki]', freq='D') + + # Respects absolute arithmetic + In [4]: ts + pd.tseries.frequencies.to_offset('D') + Out[4]: Timestamp('2016-10-30 23:00:00+0200', tz='Europe/Helsinki') + +:class:`CalendarDay` and associated frequency alias ``'CD'`` are now available +and respect calendar day arithmetic while :class:`Day` and frequency alias ``'D'`` +will now respect absolute time (:issue:`22274`, :issue:`20596`, :issue:`16980`, :issue:`8774`) +See the :ref:`documentation here ` for more information. + +Addition with :class:`CalendarDay` across a daylight savings time transition: + +.. ipython:: python + + ts = pd.Timestamp('2016-10-30 00:00:00', tz='Europe/Helsinki') + ts + pd.offsets.Day(1) + ts + pd.offsets.CalendarDay(1) + .. _whatsnew_0240.api_breaking.period_end_time: Time values in ``dt.end_time`` and ``to_timestamp(how='end')`` diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 484eb430c82b1..466cfb296094c 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -32,7 +32,7 @@ from pandas.core import ops from pandas.tseries.frequencies import to_offset -from pandas.tseries.offsets import Tick, Day, generate_range +from pandas.tseries.offsets import Tick, generate_range from pandas.core.arrays import datetimelike as dtl @@ -239,56 +239,33 @@ def _generate_range(cls, start, end, periods, freq, tz=None, start, end, _normalized = _maybe_normalize_endpoints(start, end, normalize) - tz, inferred_tz = _infer_tz_from_endpoints(start, end, tz) - - if hasattr(freq, 'delta') and freq != Day(): - # sub-Day Tick - if inferred_tz is None and tz is not None: - # naive dates - if start is not None and start.tz is None: - start = start.tz_localize(tz, ambiguous=False) - - if end is not None and end.tz is None: - end = end.tz_localize(tz, ambiguous=False) - - if start and end: - if start.tz is None and end.tz is not None: - start = start.tz_localize(end.tz, ambiguous=False) - - if end.tz is None and start.tz is not None: - end = end.tz_localize(start.tz, ambiguous=False) - + tz, _ = _infer_tz_from_endpoints(start, end, tz) + + if tz is not None: + # Localize the start and end arguments + start = _maybe_localize_point( + start, getattr(start, 'tz', None), start, freq, tz + ) + end = _maybe_localize_point( + end, getattr(end, 'tz', None), end, freq, tz + ) + if start and end: + # Make sure start and end have the same tz + start = _maybe_localize_point( + start, start.tz, end.tz, freq, tz + ) + end = _maybe_localize_point( + end, end.tz, start.tz, freq, tz + ) + if freq is not None: if cls._use_cached_range(freq, _normalized, start, end): + # Currently always False; never hit + # Should be reimplemented as apart of GH 17914 index = cls._cached_range(start, end, periods=periods, freq=freq) else: index = _generate_regular_range(cls, start, end, periods, freq) - else: - - if tz is not None: - # naive dates - if start is not None and start.tz is not None: - start = start.replace(tzinfo=None) - - if end is not None and end.tz is not None: - end = end.replace(tzinfo=None) - - if start and end: - if start.tz is None and end.tz is not None: - end = end.replace(tzinfo=None) - - if end.tz is None and start.tz is not None: - start = start.replace(tzinfo=None) - - if freq is not None: - if cls._use_cached_range(freq, _normalized, start, end): - index = cls._cached_range(start, end, periods=periods, - freq=freq) - else: - index = _generate_regular_range(cls, start, end, - periods, freq) - if tz is not None and getattr(index, 'tz', None) is None: arr = conversion.tz_localize_to_utc( ensure_int64(index.values), @@ -302,12 +279,12 @@ def _generate_range(cls, start, end, periods, freq, tz=None, start = start.tz_localize(tz).asm8 if end is not None: end = end.tz_localize(tz).asm8 - else: - # Create a linearly spaced date_range in local time - start = start.tz_localize(tz) - end = end.tz_localize(tz) - arr = np.linspace(start.value, end.value, periods) - index = cls._simple_new(arr.astype('M8[ns]'), freq=None, tz=tz) + else: + # Create a linearly spaced date_range in local time + arr = np.linspace(start.value, end.value, periods) + index = cls._simple_new( + arr.astype('M8[ns]', copy=False), freq=None, tz=tz + ) if not left_closed and len(index) and index[0] == start: index = index[1:] @@ -1256,10 +1233,10 @@ def _generate_regular_range(cls, start, end, periods, freq): data = cls._simple_new(data.view(_NS_DTYPE), None, tz=tz) else: tz = None + # start and end should have the same timezone by this point if isinstance(start, Timestamp): tz = start.tz - - if isinstance(end, Timestamp): + elif isinstance(end, Timestamp): tz = end.tz xdr = generate_range(start=start, end=end, @@ -1330,3 +1307,32 @@ def _maybe_normalize_endpoints(start, end, normalize): _normalized = _normalized and end.time() == _midnight return start, end, _normalized + + +def _maybe_localize_point(ts, is_none, is_not_none, freq, tz): + """ + Localize a start or end Timestamp to the timezone of the corresponding + start or end Timestamp + + Parameters + ---------- + ts : start or end Timestamp to potentially localize + is_none : argument that should be None + is_not_none : argument that should not be None + freq : Tick, DateOffset, or None + tz : str, timezone object or None + + Returns + ------- + ts : Timestamp + """ + # Make sure start and end are timezone localized if: + # 1) freq = a Timedelta-like frequency (Tick) + # 2) freq = None i.e. generating a linspaced range + if isinstance(freq, Tick) or freq is None: + localize_args = {'tz': tz, 'ambiguous': False} + else: + localize_args = {'tz': None} + if is_none is None and is_not_none is not None: + ts = ts.tz_localize(**localize_args) + return ts diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 019aad4941d26..629660c899a3f 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -385,7 +385,10 @@ def _generate_range(cls, start, end, periods, name=None, freq=None, @classmethod def _use_cached_range(cls, freq, _normalized, start, end): - return _use_cached_range(freq, _normalized, start, end) + # Note: This always returns False + return (freq._should_cache() and + not (freq._normalize_cache and not _normalized) and + _naive_in_cache_range(start, end)) def _convert_for_op(self, value): """ Convert value to be insertable to ndarray """ @@ -1580,7 +1583,7 @@ def date_range(start=None, end=None, periods=None, freq=None, tz=None, Right bound for generating dates. periods : integer, optional Number of periods to generate. - freq : str or DateOffset, default 'D' (calendar daily) + freq : str or DateOffset, default 'D' Frequency strings can have multiples, e.g. '5H'. See :ref:`here ` for a list of frequency aliases. @@ -1861,17 +1864,7 @@ def _naive_in_cache_range(start, end): else: if start.tzinfo is not None or end.tzinfo is not None: return False - return _in_range(start, end, _CACHE_START, _CACHE_END) - - -def _in_range(start, end, rng_start, rng_end): - return start > rng_start and end < rng_end - - -def _use_cached_range(freq, _normalized, start, end): - return (freq._should_cache() and - not (freq._normalize_cache and not _normalized) and - _naive_in_cache_range(start, end)) + return start > _CACHE_START and end < _CACHE_END def _time_to_micros(time): diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 364eea8fb8a3a..4b125580bd7e0 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1052,7 +1052,7 @@ def interval_range(start=None, end=None, periods=None, freq=None, freq : numeric, string, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 - for numeric and 'D' (calendar daily) for datetime-like. + for numeric and 'D' for datetime-like. name : string, default None Name of the resulting IntervalIndex closed : {'left', 'right', 'both', 'neither'}, default 'right' diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 32aa89010b206..3a68c6c26a974 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -840,7 +840,7 @@ def period_range(start=None, end=None, periods=None, freq='D', name=None): Right bound for generating periods periods : integer, default None Number of periods to generate - freq : string or DateOffset, default 'D' (calendar daily) + freq : string or DateOffset, default 'D' Frequency alias name : string, default None Name of the resulting PeriodIndex diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 9f14d4cfd5863..063b578e512de 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -737,7 +737,7 @@ def timedelta_range(start=None, end=None, periods=None, freq=None, Right bound for generating timedeltas periods : integer, default None Number of periods to generate - freq : string or DateOffset, default 'D' (calendar daily) + freq : string or DateOffset, default 'D' Frequency strings can have multiples, e.g. '5H' name : string, default None Name of the resulting TimedeltaIndex diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index 64b8f48f6a4e1..78b669de95598 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -259,88 +259,18 @@ def test_to_period_microsecond(self): assert period[0] == Period('2007-01-01 10:11:12.123456Z', 'U') assert period[1] == Period('2007-01-01 10:11:13.789123Z', 'U') - def test_to_period_tz_pytz(self): - from pytz import utc as UTC - - xp = date_range('1/1/2000', '4/1/2000').to_period() - - ts = date_range('1/1/2000', '4/1/2000', tz='US/Eastern') - - result = ts.to_period()[0] - expected = ts[0].to_period() - - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=UTC) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) - - def test_to_period_tz_explicit_pytz(self): - xp = date_range('1/1/2000', '4/1/2000').to_period() - - ts = date_range('1/1/2000', '4/1/2000', tz=pytz.timezone('US/Eastern')) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=pytz.utc) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) - + @pytest.mark.parametrize('tz', [ + 'US/Eastern', pytz.utc, tzlocal(), 'dateutil/US/Eastern', + dateutil.tz.tzutc()]) + def test_to_period_tz(self, tz): + ts = date_range('1/1/2000', '2/1/2000', tz=tz) result = ts.to_period()[0] expected = ts[0].to_period() - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) - - def test_to_period_tz_dateutil(self): - xp = date_range('1/1/2000', '4/1/2000').to_period() - ts = date_range('1/1/2000', '4/1/2000', tz='dateutil/US/Eastern') - - result = ts.to_period()[0] - expected = ts[0].to_period() - - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=dateutil.tz.tzutc()) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) + expected = date_range('1/1/2000', '2/1/2000').to_period() + result = ts.to_period() + tm.assert_index_equal(result, expected) def test_to_period_nofreq(self): idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 22fb8b2942bea..e0caf671fc390 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -336,28 +336,28 @@ def test_range_tz_pytz(self): assert dr[0] == start assert dr[2] == end - def test_range_tz_dst_straddle_pytz(self): - tz = timezone('US/Eastern') - dates = [(tz.localize(datetime(2014, 3, 6)), - tz.localize(datetime(2014, 3, 12))), - (tz.localize(datetime(2013, 11, 1)), - tz.localize(datetime(2013, 11, 6)))] - for (start, end) in dates: - dr = date_range(start, end, freq='D') - assert dr[0] == start - assert dr[-1] == end - assert np.all(dr.hour == 0) - - dr = date_range(start, end, freq='D', tz='US/Eastern') - assert dr[0] == start - assert dr[-1] == end - assert np.all(dr.hour == 0) - - dr = date_range(start.replace(tzinfo=None), end.replace( - tzinfo=None), freq='D', tz='US/Eastern') - assert dr[0] == start - assert dr[-1] == end - assert np.all(dr.hour == 0) + @pytest.mark.parametrize('start, end', [ + [Timestamp(datetime(2014, 3, 6), tz='US/Eastern'), + Timestamp(datetime(2014, 3, 12), tz='US/Eastern')], + [Timestamp(datetime(2013, 11, 1), tz='US/Eastern'), + Timestamp(datetime(2013, 11, 6), tz='US/Eastern')] + ]) + def test_range_tz_dst_straddle_pytz(self, start, end): + dr = date_range(start, end, freq='CD') + assert dr[0] == start + assert dr[-1] == end + assert np.all(dr.hour == 0) + + dr = date_range(start, end, freq='CD', tz='US/Eastern') + assert dr[0] == start + assert dr[-1] == end + assert np.all(dr.hour == 0) + + dr = date_range(start.replace(tzinfo=None), end.replace( + tzinfo=None), freq='CD', tz='US/Eastern') + assert dr[0] == start + assert dr[-1] == end + assert np.all(dr.hour == 0) def test_range_tz_dateutil(self): # see gh-2906 @@ -578,6 +578,14 @@ def test_mismatching_tz_raises_err(self, start, end): with pytest.raises(TypeError): pd.DatetimeIndex(start, end, freq=BDay()) + def test_CalendarDay_range_with_dst_crossing(self): + # GH 20596 + result = date_range('2018-10-23', '2018-11-06', freq='7CD', + tz='Europe/Paris') + expected = date_range('2018-10-23', '2018-11-06', + freq=pd.DateOffset(days=7), tz='Europe/Paris') + tm.assert_index_equal(result, expected) + class TestBusinessDateRange(object): @@ -772,7 +780,8 @@ def test_cdaterange_weekmask_and_holidays(self): holidays=['2013-05-01']) @pytest.mark.parametrize('freq', [freq for freq in prefix_mapping - if freq.startswith('C')]) + if freq.startswith('C') + and freq != 'CD']) # CalendarDay def test_all_custom_freq(self, freq): # should not raise bdate_range(START, END, freq=freq, weekmask='Mon Wed Fri', diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 95531b2d7a7ae..dc01f7ccbd496 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -429,24 +429,24 @@ def test_dti_tz_localize_utc_conversion(self, tz): with pytest.raises(pytz.NonExistentTimeError): rng.tz_localize(tz) - def test_dti_tz_localize_roundtrip(self, tz_aware_fixture): + @pytest.mark.parametrize('idx', [ + date_range(start='2014-01-01', end='2014-12-31', freq='M'), + date_range(start='2014-01-01', end='2014-12-31', freq='CD'), + date_range(start='2014-01-01', end='2014-03-01', freq='H'), + date_range(start='2014-08-01', end='2014-10-31', freq='T') + ]) + def test_dti_tz_localize_roundtrip(self, tz_aware_fixture, idx): tz = tz_aware_fixture + localized = idx.tz_localize(tz) + expected = date_range(start=idx[0], end=idx[-1], freq=idx.freq, + tz=tz) + tm.assert_index_equal(localized, expected) + with pytest.raises(TypeError): + localized.tz_localize(tz) - idx1 = date_range(start='2014-01-01', end='2014-12-31', freq='M') - idx2 = date_range(start='2014-01-01', end='2014-12-31', freq='D') - idx3 = date_range(start='2014-01-01', end='2014-03-01', freq='H') - idx4 = date_range(start='2014-08-01', end='2014-10-31', freq='T') - for idx in [idx1, idx2, idx3, idx4]: - localized = idx.tz_localize(tz) - expected = date_range(start=idx[0], end=idx[-1], freq=idx.freq, - tz=tz) - tm.assert_index_equal(localized, expected) - with pytest.raises(TypeError): - localized.tz_localize(tz) - - reset = localized.tz_localize(None) - tm.assert_index_equal(reset, idx) - assert reset.tzinfo is None + reset = localized.tz_localize(None) + tm.assert_index_equal(reset, idx) + assert reset.tzinfo is None def test_dti_tz_localize_naive(self): rng = date_range('1/1/2011', periods=100, freq='H') @@ -1033,7 +1033,9 @@ def test_date_range_span_dst_transition(self, tzstr): assert (dr.hour == 0).all() dr = date_range('2012-11-02', periods=10, tz=tzstr) - assert (dr.hour == 0).all() + result = dr.hour + expected = Index([0, 0, 0, 23, 23, 23, 23, 23, 23, 23]) + tm.assert_index_equal(result, expected) @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) def test_date_range_timezone_str_argument(self, tzstr): diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py index 87dff74cd04d7..1d10e63363cc8 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta_range.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -48,6 +48,10 @@ def test_timedelta_range(self): result = df.loc['0s':, :] tm.assert_frame_equal(expected, result) + with pytest.raises(ValueError): + # GH 22274: CalendarDay is a relative time measurement + timedelta_range('1day', freq='CD', periods=2) + @pytest.mark.parametrize('periods, freq', [ (3, '2D'), (5, 'D'), (6, '19H12T'), (7, '16H'), (9, '12H')]) def test_linspace_behavior(self, periods, freq): diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py index d59e7fd445f17..472b2c5644fa5 100644 --- a/pandas/tests/series/test_timezones.py +++ b/pandas/tests/series/test_timezones.py @@ -302,7 +302,7 @@ def test_getitem_pydatetime_tz(self, tzstr): def test_series_truncate_datetimeindex_tz(self): # GH 9243 - idx = date_range('4/1/2005', '4/30/2005', freq='D', tz='US/Pacific') + idx = date_range('4/1/2005', '4/30/2005', freq='CD', tz='US/Pacific') s = Series(range(len(idx)), index=idx) result = s.truncate(datetime(2005, 4, 2), datetime(2005, 4, 4)) expected = Series([1, 2, 3], index=idx[1:4]) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 530a683c02f9d..669fa9742a705 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -2038,7 +2038,7 @@ def test_resample_dst_anchor(self): # 5172 dti = DatetimeIndex([datetime(2012, 11, 4, 23)], tz='US/Eastern') df = DataFrame([5], index=dti) - assert_frame_equal(df.resample(rule='D').sum(), + assert_frame_equal(df.resample(rule='CD').sum(), DataFrame([5], index=df.index.normalize())) df.resample(rule='MS').sum() assert_frame_equal( @@ -2092,14 +2092,14 @@ def test_resample_dst_anchor(self): df_daily = df['10/26/2013':'10/29/2013'] assert_frame_equal( - df_daily.resample("D").agg({"a": "min", "b": "max", "c": "count"}) + df_daily.resample("CD").agg({"a": "min", "b": "max", "c": "count"}) [["a", "b", "c"]], DataFrame({"a": [1248, 1296, 1346, 1394], "b": [1295, 1345, 1393, 1441], "c": [48, 50, 48, 48]}, index=date_range('10/26/2013', '10/29/2013', - freq='D', tz='Europe/Paris')), - 'D Frequency') + freq='CD', tz='Europe/Paris')), + 'CD Frequency') def test_downsample_across_dst(self): # GH 8531 diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index e95f1ba11ad5c..f9f5fc2484bda 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -2,6 +2,7 @@ from datetime import date, datetime, timedelta import pytest +import pytz from pandas.compat import range from pandas import compat @@ -16,6 +17,7 @@ from pandas.tseries.frequencies import _offset_map, get_offset from pandas.core.indexes.datetimes import ( _to_m8, DatetimeIndex, _daterange_cache) +from pandas.core.indexes.timedeltas import TimedeltaIndex import pandas._libs.tslibs.offsets as liboffsets from pandas._libs.tslibs.offsets import CacheableOffset from pandas.tseries.offsets import (BDay, CDay, BQuarterEnd, BMonthEnd, @@ -28,11 +30,12 @@ YearEnd, Day, QuarterEnd, BusinessMonthEnd, FY5253, Nano, Easter, FY5253Quarter, - LastWeekOfMonth, Tick) + LastWeekOfMonth, Tick, CalendarDay) import pandas.tseries.offsets as offsets from pandas.io.pickle import read_pickle from pandas._libs.tslibs import timezones from pandas._libs.tslib import NaT, Timestamp +from pandas._libs.tslibs.timedeltas import Timedelta import pandas._libs.tslib as tslib import pandas.util.testing as tm from pandas.tseries.holiday import USFederalHolidayCalendar @@ -192,6 +195,7 @@ class TestCommon(Base): # are applied to 2011/01/01 09:00 (Saturday) # used for .apply and .rollforward expecteds = {'Day': Timestamp('2011-01-02 09:00:00'), + 'CalendarDay': Timestamp('2011-01-02 09:00:00'), 'DateOffset': Timestamp('2011-01-02 09:00:00'), 'BusinessDay': Timestamp('2011-01-03 09:00:00'), 'CustomBusinessDay': Timestamp('2011-01-03 09:00:00'), @@ -360,7 +364,7 @@ def test_rollforward(self, offset_types): # result will not be changed if the target is on the offset no_changes = ['Day', 'MonthBegin', 'SemiMonthBegin', 'YearBegin', 'Week', 'Hour', 'Minute', 'Second', 'Milli', 'Micro', - 'Nano', 'DateOffset'] + 'Nano', 'DateOffset', 'CalendarDay'] for n in no_changes: expecteds[n] = Timestamp('2011/01/01 09:00') @@ -373,6 +377,7 @@ def test_rollforward(self, offset_types): norm_expected[k] = Timestamp(norm_expected[k].date()) normalized = {'Day': Timestamp('2011-01-02 00:00:00'), + 'CalendarDay': Timestamp('2011-01-02 00:00:00'), 'DateOffset': Timestamp('2011-01-02 00:00:00'), 'MonthBegin': Timestamp('2011-02-01 00:00:00'), 'SemiMonthBegin': Timestamp('2011-01-15 00:00:00'), @@ -425,7 +430,7 @@ def test_rollback(self, offset_types): # result will not be changed if the target is on the offset for n in ['Day', 'MonthBegin', 'SemiMonthBegin', 'YearBegin', 'Week', 'Hour', 'Minute', 'Second', 'Milli', 'Micro', 'Nano', - 'DateOffset']: + 'DateOffset', 'CalendarDay']: expecteds[n] = Timestamp('2011/01/01 09:00') # but be changed when normalize=True @@ -434,6 +439,7 @@ def test_rollback(self, offset_types): norm_expected[k] = Timestamp(norm_expected[k].date()) normalized = {'Day': Timestamp('2010-12-31 00:00:00'), + 'CalendarDay': Timestamp('2010-12-31 00:00:00'), 'DateOffset': Timestamp('2010-12-31 00:00:00'), 'MonthBegin': Timestamp('2010-12-01 00:00:00'), 'SemiMonthBegin': Timestamp('2010-12-15 00:00:00'), @@ -3174,3 +3180,71 @@ def test_last_week_of_month_on_offset(): slow = (ts + offset) - offset == ts fast = offset.onOffset(ts) assert fast == slow + + +class TestCalendarDay(object): + + def test_add_across_dst_scalar(self): + # GH 22274 + ts = Timestamp('2016-10-30 00:00:00+0300', tz='Europe/Helsinki') + expected = Timestamp('2016-10-31 00:00:00+0200', tz='Europe/Helsinki') + result = ts + CalendarDay(1) + assert result == expected + + result = result - CalendarDay(1) + assert result == ts + + @pytest.mark.parametrize('box', [DatetimeIndex, Series]) + def test_add_across_dst_array(self, box): + # GH 22274 + ts = Timestamp('2016-10-30 00:00:00+0300', tz='Europe/Helsinki') + expected = Timestamp('2016-10-31 00:00:00+0200', tz='Europe/Helsinki') + arr = box([ts]) + expected = box([expected]) + result = arr + CalendarDay(1) + tm.assert_equal(result, expected) + + result = result - CalendarDay(1) + tm.assert_equal(arr, result) + + @pytest.mark.parametrize('arg', [ + Timestamp("2018-11-03 01:00:00", tz='US/Pacific'), + DatetimeIndex([Timestamp("2018-11-03 01:00:00", tz='US/Pacific')]) + ]) + def test_raises_AmbiguousTimeError(self, arg): + # GH 22274 + with pytest.raises(pytz.AmbiguousTimeError): + arg + CalendarDay(1) + + @pytest.mark.parametrize('arg', [ + Timestamp("2019-03-09 02:00:00", tz='US/Pacific'), + DatetimeIndex([Timestamp("2019-03-09 02:00:00", tz='US/Pacific')]) + ]) + def test_raises_NonExistentTimeError(self, arg): + # GH 22274 + with pytest.raises(pytz.NonExistentTimeError): + arg + CalendarDay(1) + + @pytest.mark.parametrize('arg, exp', [ + [1, 2], + [-1, 0], + [-5, -4] + ]) + def test_arithmetic(self, arg, exp): + # GH 22274 + result = CalendarDay(1) + CalendarDay(arg) + expected = CalendarDay(exp) + assert result == expected + + @pytest.mark.parametrize('arg', [ + timedelta(1), + Day(1), + Timedelta(1), + TimedeltaIndex([timedelta(1)]) + ]) + def test_invalid_arithmetic(self, arg): + # GH 22274 + # CalendarDay (relative time) cannot be added to Timedelta-like objects + # (absolute time) + with pytest.raises(TypeError): + CalendarDay(1) + arg diff --git a/pandas/tests/tseries/offsets/test_ticks.py b/pandas/tests/tseries/offsets/test_ticks.py index 914d61a18ee11..369c0971f1e9a 100644 --- a/pandas/tests/tseries/offsets/test_ticks.py +++ b/pandas/tests/tseries/offsets/test_ticks.py @@ -10,7 +10,8 @@ from pandas import Timedelta, Timestamp from pandas.tseries import offsets -from pandas.tseries.offsets import Hour, Minute, Second, Milli, Micro, Nano +from pandas.tseries.offsets import (Day, Hour, Minute, Second, Milli, Micro, + Nano) from .common import assert_offset_equal @@ -211,6 +212,13 @@ def test_Nanosecond(): assert Micro(5) + Nano(1) == Nano(5001) +def test_Day_equals_24_Hours(): + ts = Timestamp('2016-10-30 00:00:00+0300', tz='Europe/Helsinki') + result = ts + Day(1) + expected = ts + Hour(24) + assert result == expected + + @pytest.mark.parametrize('kls, expected', [(Hour, Timedelta(hours=5)), (Minute, Timedelta(hours=2, minutes=3)), diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index c74b7454a67e3..d4a8211c17b87 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -41,7 +41,7 @@ 'LastWeekOfMonth', 'FY5253Quarter', 'FY5253', 'Week', 'WeekOfMonth', 'Easter', 'Hour', 'Minute', 'Second', 'Milli', 'Micro', 'Nano', - 'DateOffset'] + 'DateOffset', 'CalendarDay'] # convert to/from datetime/timestamp to allow invalid Timestamp ranges to # pass thru @@ -2123,6 +2123,54 @@ def onOffset(self, dt): return False return date(dt.year, dt.month, dt.day) == easter(dt.year) + +class CalendarDay(SingleConstructorOffset): + """ + Calendar day offset. Respects calendar arithmetic as opposed to Day which + respects absolute time. + """ + _adjust_dst = True + _inc = Timedelta(days=1) + _prefix = 'CD' + _attributes = frozenset(['n', 'normalize']) + + def __init__(self, n=1, normalize=False): + BaseOffset.__init__(self, n, normalize) + + @apply_wraps + def apply(self, other): + """ + Apply scalar arithmetic with CalendarDay offset. Incoming datetime + objects can be tz-aware or naive. + """ + if type(other) == type(self): + # Add other CalendarDays + return type(self)(self.n + other.n, normalize=self.normalize) + tzinfo = getattr(other, 'tzinfo', None) + if tzinfo is not None: + other = other.replace(tzinfo=None) + + other = other + self.n * self._inc + + if tzinfo is not None: + # This can raise a AmbiguousTimeError or NonExistentTimeError + other = conversion.localize_pydatetime(other, tzinfo) + + try: + return as_timestamp(other) + except TypeError: + raise TypeError("Cannot perform arithmetic between {other} and " + "CalendarDay".format(other=type(other))) + + @apply_index_wraps + def apply_index(self, i): + """ + Apply the CalendarDay offset to a DatetimeIndex. Incoming DatetimeIndex + objects are assumed to be tz_naive + """ + return i + self.n * self._inc + + # --------------------------------------------------------------------- # Ticks @@ -2310,7 +2358,8 @@ def generate_range(start=None, end=None, periods=None, ---------- start : datetime (default None) end : datetime (default None) - periods : int, optional + periods : int, (default None) + offset : DateOffset, (default BDay()) time_rule : (legacy) name of DateOffset object to be used, optional Corresponds with names expected by tseries.frequencies.get_offset @@ -2406,4 +2455,5 @@ def generate_range(start=None, end=None, periods=None, WeekOfMonth, # 'WOM' FY5253, FY5253Quarter, + CalendarDay # 'CD' ]}