From f0c125da0751ae14e14b58f69a2efa49cb0d02e8 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 4 Aug 2018 23:37:36 -0700 Subject: [PATCH] CLN: Old timezone issues --- doc/source/whatsnew/v0.24.0.txt | 4 ++ .../indexes/datetimes/test_construction.py | 18 +++++++ .../tests/indexes/datetimes/test_datetime.py | 3 ++ pandas/tests/indexing/test_datetime.py | 47 +++++++++++++++++++ 4 files changed, 72 insertions(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 5c15c7b6a742f..f419e2d06239c 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -561,6 +561,10 @@ Timezones - Fixed bug where :meth:`DataFrame.describe` and :meth:`Series.describe` on tz-aware datetimes did not show `first` and `last` result (:issue:`21328`) - Bug in :class:`DatetimeIndex` comparisons failing to raise ``TypeError`` when comparing timezone-aware ``DatetimeIndex`` against ``np.datetime64`` (:issue:`22074`) - Bug in ``DataFrame`` assignment with a timezone-aware scalar (:issue:`19843`) +- Bug when constructing a :class:`DatetimeIndex` with :class:`Timestamp`s constructed with the ``replace`` method across DST (:issue:`18785`) +- Bug when setting a new value with :meth:`DataFrame.loc` with a :class:`DatetimeIndex` with a DST transition (:issue:`18308`, :issue:`20724`) +- Bug in :meth:`DatetimeIndex.unique` that did not re-localize tz-aware dates correctly (:issue:`21737`) +- Bug when indexing a :class:`Series` with a DST transition (:issue:`21846`) Offsets ^^^^^^^ diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 5653943c37e37..b6f27cbdd1b89 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -503,6 +503,24 @@ def test_construction_int_rountrip(self, tz_naive_fixture): expected = DatetimeIndex([1293858000000000000], tz=tz).asi8[0] assert result == expected + def test_construction_from_replaced_timestamps_with_dst(self): + # GH 18785 + index = pd.date_range(pd.Timestamp(2000, 1, 1), + pd.Timestamp(2005, 1, 1), + freq='MS', tz='Australia/Melbourne') + test = pd.DataFrame({'data': range(len(index))}, index=index) + test = test.resample('Y').mean() + result = pd.DatetimeIndex([x.replace(month=6, day=1) + for x in test.index]) + expected = pd.DatetimeIndex(['2000-06-01 00:00:00', + '2001-06-01 00:00:00', + '2002-06-01 00:00:00', + '2003-06-01 00:00:00', + '2004-06-01 00:00:00', + '2005-06-01 00:00:00'], + tz='Australia/Melbourne') + tm.assert_index_equal(result, expected) + class TestTimeSeries(object): diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 2adf09924a509..db3de0ceced0c 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -394,3 +394,6 @@ def test_factorize_dst(self): def test_unique(self, arr, expected): result = arr.unique() tm.assert_index_equal(result, expected) + # GH 21737 + # Ensure the underlying data is consistent + assert result[0] == expected[0] diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index 751372380d262..df59390475da8 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -1,5 +1,9 @@ +from datetime import datetime, timedelta + import numpy as np import pandas as pd +from dateutil import tz + from pandas import date_range, Index, DataFrame, Series, Timestamp from pandas.util import testing as tm @@ -266,3 +270,46 @@ def test_nanosecond_getitem_setitem_with_tz(self): result.loc[df.index[0], 'a'] = -1 expected = DataFrame(-1, index=index, columns=['a']) tm.assert_frame_equal(result, expected) + + def test_loc_getitem_across_dst(self): + # GH 21846 + idx = pd.date_range('2017-10-29 01:30:00', + tz='Europe/Berlin', periods=5, freq='30 min') + series2 = pd.Series([0, 1, 2, 3, 4], + index=idx) + + t_1 = pd.Timestamp('2017-10-29 02:30:00+02:00', tz='Europe/Berlin', + freq='30min') + t_2 = pd.Timestamp('2017-10-29 02:00:00+01:00', tz='Europe/Berlin', + freq='30min') + result = series2.loc[t_1:t_2] + expected = pd.Series([2, 3], index=idx[2:4]) + tm.assert_series_equal(result, expected) + + result = series2[t_1] + expected = 2 + assert result == expected + + def test_loc_incremental_setitem_with_dst(self): + # GH 20724 + base = datetime(2015, 11, 1, tzinfo=tz.gettz("US/Pacific")) + idxs = [base + timedelta(seconds=i * 900) for i in range(16)] + result = pd.Series([0], index=[idxs[0]]) + for ts in idxs: + result.loc[ts] = 1 + expected = pd.Series(1, index=idxs) + tm.assert_series_equal(result, expected) + + def test_loc_setitem_with_existing_dst(self): + # GH 18308 + start = pd.Timestamp('2017-10-29 00:00:00+0200', tz='Europe/Madrid') + end = pd.Timestamp('2017-10-29 03:00:00+0100', tz='Europe/Madrid') + ts = pd.Timestamp('2016-10-10 03:00:00', tz='Europe/Madrid') + idx = pd.date_range(start, end, closed='left', freq="H") + result = pd.DataFrame(index=idx, columns=['value']) + result.loc[ts, 'value'] = 12 + expected = pd.DataFrame([np.nan] * len(idx) + [12], + index=idx.append(pd.DatetimeIndex([ts])), + columns=['value'], + dtype=object) + tm.assert_frame_equal(result, expected)