From 32aa2f7d1769e58c99086c201993ad61358b38ca Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 25 Aug 2018 18:22:00 -0700 Subject: [PATCH 1/4] BUG: Resampling when the edges lie on DST transition --- pandas/_libs/tslibs/conversion.pyx | 39 ++++++++++++++++-------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index fe664cf03b0b9..02fe2461845c3 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -898,29 +898,32 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, result_a.fill(NPY_NAT) result_b.fill(NPY_NAT) - # left side - idx_shifted = (np.maximum(0, trans.searchsorted( + idx_shifted_left = (np.maximum(0, trans.searchsorted( vals - DAY_NS, side='right') - 1)).astype(np.int64) - for i in range(n): - v = vals[i] - deltas[idx_shifted[i]] - pos = bisect_right_i8(tdata, v, ntrans) - 1 - - # timestamp falls to the left side of the DST transition - if v + deltas[pos] == vals[i]: - result_a[i] = v - - # right side - idx_shifted = (np.maximum(0, trans.searchsorted( + idx_shifted_right = (np.maximum(0, trans.searchsorted( vals + DAY_NS, side='right') - 1)).astype(np.int64) for i in range(n): - v = vals[i] - deltas[idx_shifted[i]] - pos = bisect_right_i8(tdata, v, ntrans) - 1 - - # timestamp falls to the right side of the DST transition - if v + deltas[pos] == vals[i]: - result_b[i] = v + v_left = vals[i] - deltas[idx_shifted_left[i]] + if v_left in trans: + # The vals[i] lies directly on the DST border. + result_a[i] = v_left + else: + pos_left = bisect_right_i8(tdata, v_left, ntrans) - 1 + # timestamp falls to the left side of the DST transition + if v_left + deltas[pos_left] == vals[i]: + result_a[i] = v_left + + v_right = vals[i] - deltas[idx_shifted_right[i]] + if v_right in trans: + # The vals[i] lies directly on the DST border. + result_b[i] = v_right + else: + pos_right = bisect_right_i8(tdata, v_right, ntrans) - 1 + # timestamp falls to the right side of the DST transition + if v_right + deltas[pos_right] == vals[i]: + result_b[i] = v_right if infer_dst: dst_hours = np.empty(n, dtype=np.int64) From 7fbd6cc143a975639cdb1994a8208408b1868cb5 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 26 Aug 2018 10:47:16 -0700 Subject: [PATCH 2/4] Add more comments --- pandas/_libs/tslibs/conversion.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 02fe2461845c3..966801a400c37 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -893,6 +893,8 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, tdata = cnp.PyArray_DATA(trans) ntrans = len(trans) + # Determine whether each date lies left of the DST transition (store in + # result_a) or right of the DST transition (store in result_b) result_a = np.empty(n, dtype=np.int64) result_b = np.empty(n, dtype=np.int64) result_a.fill(NPY_NAT) From 17e7f43ec3fa20d41e8ed30904add038943005b4 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 26 Aug 2018 11:12:30 -0700 Subject: [PATCH 3/4] add tests and whatsnew --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/tests/test_resample.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 3e22084d98234..ade395b15cd47 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -615,6 +615,7 @@ Timezones - Bug when setting a new value with :meth:`DataFrame.loc` with a :class:`DatetimeIndex` with a DST transition (:issue:`18308`, :issue:`20724`) - Bug in :meth:`DatetimeIndex.unique` that did not re-localize tz-aware dates correctly (:issue:`21737`) - Bug when indexing a :class:`Series` with a DST transition (:issue:`21846`) +- Bug in :meth:`DataFrame.resample` when :class:`DatetimeIndex` starts or ends on a DST transition (:issue:`10117`, :issue:`19375`) Offsets ^^^^^^^ diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 38801832829b0..1cf5972b17cc9 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -2125,6 +2125,22 @@ def test_downsample_across_dst(self): freq='H')) tm.assert_series_equal(result, expected) + def test_bin_edges_on_DST_transition(self): + # GH 10117 + # Ends on DST boundary + idx = date_range("2014-10-26 00:30:00", "2014-10-26 02:30:00", + freq="30T", tz="Europe/London") + expected = Series(range(len(idx)), index=idx) + result = expected.resample('30T').mean() + tm.assert_series_equal(result, expected) + + # Starts on DST boundary + idx = date_range('2014-03-09 03:00', '2015-03-09 03:00', + freq='H', tz='America/Chicago') + expected = Series(range(len(idx)), index=idx) + result = expected.resample('H').mean() + tm.assert_series_equal(result, expected) + def test_resample_with_nat(self): # GH 13020 index = DatetimeIndex([pd.NaT, From c070dffe192644f3e6bd36458fff1119a661229a Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 26 Aug 2018 11:25:26 -0700 Subject: [PATCH 4/4] Adjust test --- pandas/tests/test_resample.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 1cf5972b17cc9..73b48fbf511c3 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -2135,10 +2135,14 @@ def test_bin_edges_on_DST_transition(self): tm.assert_series_equal(result, expected) # Starts on DST boundary - idx = date_range('2014-03-09 03:00', '2015-03-09 03:00', + idx = date_range('2014-03-09 03:00', periods=4, freq='H', tz='America/Chicago') - expected = Series(range(len(idx)), index=idx) - result = expected.resample('H').mean() + s = Series(range(len(idx)), index=idx) + result = s.resample('H', label='right', closed='right').sum() + expected = Series([1, 2, 3], index=date_range('2014-03-09 04:00', + periods=3, + freq='H', + tz='America/Chicago')) tm.assert_series_equal(result, expected) def test_resample_with_nat(self):