diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 3e22084d98234..ade395b15cd47 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -615,6 +615,7 @@ Timezones - Bug when setting a new value with :meth:`DataFrame.loc` with a :class:`DatetimeIndex` with a DST transition (:issue:`18308`, :issue:`20724`) - Bug in :meth:`DatetimeIndex.unique` that did not re-localize tz-aware dates correctly (:issue:`21737`) - Bug when indexing a :class:`Series` with a DST transition (:issue:`21846`) +- Bug in :meth:`DataFrame.resample` when :class:`DatetimeIndex` starts or ends on a DST transition (:issue:`10117`, :issue:`19375`) Offsets ^^^^^^^ diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index fe664cf03b0b9..966801a400c37 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -893,34 +893,39 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, tdata = cnp.PyArray_DATA(trans) ntrans = len(trans) + # Determine whether each date lies left of the DST transition (store in + # result_a) or right of the DST transition (store in result_b) result_a = np.empty(n, dtype=np.int64) result_b = np.empty(n, dtype=np.int64) result_a.fill(NPY_NAT) result_b.fill(NPY_NAT) - # left side - idx_shifted = (np.maximum(0, trans.searchsorted( + idx_shifted_left = (np.maximum(0, trans.searchsorted( vals - DAY_NS, side='right') - 1)).astype(np.int64) - for i in range(n): - v = vals[i] - deltas[idx_shifted[i]] - pos = bisect_right_i8(tdata, v, ntrans) - 1 - - # timestamp falls to the left side of the DST transition - if v + deltas[pos] == vals[i]: - result_a[i] = v - - # right side - idx_shifted = (np.maximum(0, trans.searchsorted( + idx_shifted_right = (np.maximum(0, trans.searchsorted( vals + DAY_NS, side='right') - 1)).astype(np.int64) for i in range(n): - v = vals[i] - deltas[idx_shifted[i]] - pos = bisect_right_i8(tdata, v, ntrans) - 1 - - # timestamp falls to the right side of the DST transition - if v + deltas[pos] == vals[i]: - result_b[i] = v + v_left = vals[i] - deltas[idx_shifted_left[i]] + if v_left in trans: + # The vals[i] lies directly on the DST border. + result_a[i] = v_left + else: + pos_left = bisect_right_i8(tdata, v_left, ntrans) - 1 + # timestamp falls to the left side of the DST transition + if v_left + deltas[pos_left] == vals[i]: + result_a[i] = v_left + + v_right = vals[i] - deltas[idx_shifted_right[i]] + if v_right in trans: + # The vals[i] lies directly on the DST border. + result_b[i] = v_right + else: + pos_right = bisect_right_i8(tdata, v_right, ntrans) - 1 + # timestamp falls to the right side of the DST transition + if v_right + deltas[pos_right] == vals[i]: + result_b[i] = v_right if infer_dst: dst_hours = np.empty(n, dtype=np.int64) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 38801832829b0..73b48fbf511c3 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -2125,6 +2125,26 @@ def test_downsample_across_dst(self): freq='H')) tm.assert_series_equal(result, expected) + def test_bin_edges_on_DST_transition(self): + # GH 10117 + # Ends on DST boundary + idx = date_range("2014-10-26 00:30:00", "2014-10-26 02:30:00", + freq="30T", tz="Europe/London") + expected = Series(range(len(idx)), index=idx) + result = expected.resample('30T').mean() + tm.assert_series_equal(result, expected) + + # Starts on DST boundary + idx = date_range('2014-03-09 03:00', periods=4, + freq='H', tz='America/Chicago') + s = Series(range(len(idx)), index=idx) + result = s.resample('H', label='right', closed='right').sum() + expected = Series([1, 2, 3], index=date_range('2014-03-09 04:00', + periods=3, + freq='H', + tz='America/Chicago')) + tm.assert_series_equal(result, expected) + def test_resample_with_nat(self): # GH 13020 index = DatetimeIndex([pd.NaT,