From 3436c782570caed6388aa3150947080db988e429 Mon Sep 17 00:00:00 2001 From: Mridul Seth Date: Fri, 1 Dec 2017 13:23:29 -0500 Subject: [PATCH 1/2] Add test for GH 18523 and add _tz_compare() to compare timezone of DateTimeIndex --- pandas/core/indexes/datetimes.py | 19 ++++++++++++++++-- .../indexes/datetimes/test_construction.py | 20 +++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 196c881f97526..15d55ca2b0ac5 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1127,6 +1127,21 @@ def join(self, other, how='left', level=None, return_indexers=False, return Index.join(this, other, how=how, level=level, return_indexers=return_indexers, sort=sort) + def _tz_compare(self, other): + """ + Compare time zones of DatetimeIndex. + + Parameters + ---------- + other: DatetimeIndex + + Returns: + ------- + compare : Boolean + + """ + return str(self.tzinfo) == str(other.tzinfo) + def _maybe_utc_convert(self, other): this = self if isinstance(other, DatetimeIndex): @@ -1138,7 +1153,7 @@ def _maybe_utc_convert(self, other): raise TypeError('Cannot join tz-naive with tz-aware ' 'DatetimeIndex') - if self.tz != other.tz: + if not self._tz_compare(other): this = self.tz_convert('UTC') other = other.tz_convert('UTC') return this, other @@ -1243,7 +1258,7 @@ def __iter__(self): def _wrap_union_result(self, other, result): name = self.name if self.name == other.name else None - if self.tz != other.tz: + if not self._tz_compare(other): raise ValueError('Passed item and index have different timezone') return self._simple_new(result, name=name, freq=None, tz=self.tz) diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index b59dd25ead57f..588d5968bc932 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -443,6 +443,26 @@ def test_000constructor_resolution(self): assert idx.nanosecond[0] == t1.nanosecond + def test_concat(self): + idx1 = pd.date_range('2011-01-01', periods=3, freq='H', + tz='Europe/Paris') + idx2 = pd.date_range(start=idx1[0], end=idx1[-1], freq='H') + df1 = pd.DataFrame({'a': [1, 2, 3]}, index=idx1) + df2 = pd.DataFrame({'b': [1, 2, 3]}, index=idx2) + res = pd.concat([df1, df2], axis=1) + + assert str(res.index.tzinfo) == str(df1.index.tzinfo) + assert str(res.index.tzinfo) == str(df2.index.tzinfo) + + idx3 = pd.date_range('2011-01-01', periods=3, + freq='H', tz='Asia/Tokyo') + df3 = pd.DataFrame({'b': [1, 2, 3]}, index=idx3) + res = pd.concat([df1, df3], axis=1) + + assert str(res.index.tzinfo) == 'UTC' + assert str(res.index.tzinfo) != str(df1.index.tzinfo) + assert str(res.index.tzinfo) != str(df3.index.tzinfo) + class TestTimeSeries(object): From 259d6d82109e5659b8920083658e0cab548e4f84 Mon Sep 17 00:00:00 2001 From: Mridul Seth Date: Fri, 1 Dec 2017 16:12:07 -0500 Subject: [PATCH 2/2] Address comments on the PR --- pandas/core/indexes/datetimes.py | 9 ++++- pandas/tests/indexes/datetimes/test_tools.py | 2 +- pandas/tests/reshape/test_concat.py | 39 ++++++++++++++++++++ 3 files changed, 48 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 15d55ca2b0ac5..fb3fdafce9c85 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1129,7 +1129,13 @@ def join(self, other, how='left', level=None, return_indexers=False, def _tz_compare(self, other): """ - Compare time zones of DatetimeIndex. + Compare string representations of timezones of two DatetimeIndex as + directly comparing equality is broken. The same timezone can be + represented as different instances of timezones. For example + `` and + `` are essentially same + timezones but aren't evaluted such, but the string representation + for both of these is `'Europe/Paris'`. Parameters ---------- @@ -1140,6 +1146,7 @@ def _tz_compare(self, other): compare : Boolean """ + # GH 18523 return str(self.tzinfo) == str(other.tzinfo) def _maybe_utc_convert(self, other): diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index a1287c3102b77..2f0beefbac5ce 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -892,7 +892,7 @@ def test_to_datetime_freq(self): xp = bdate_range('2000-1-1', periods=10, tz='UTC') rs = xp.to_datetime() assert xp.freq == rs.freq - assert xp.tzinfo == rs.tzinfo + assert xp._tz_compare(rs) def test_to_datetime_overflow(self): # gh-17637 diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index ae41502f237f1..c4f0b6a2b64f5 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1959,6 +1959,45 @@ def test_concat_order(self): expected = expected.sort_values() tm.assert_index_equal(result, expected) + def test_concat_datetime_timezone(self): + # GH 18523 + idx1 = pd.date_range('2011-01-01', periods=3, freq='H', + tz='Europe/Paris') + idx2 = pd.date_range(start=idx1[0], end=idx1[-1], freq='H') + df1 = pd.DataFrame({'a': [1, 2, 3]}, index=idx1) + df2 = pd.DataFrame({'b': [1, 2, 3]}, index=idx2) + res = pd.concat([df1, df2], axis=1) + + exp_idx = DatetimeIndex(['2011-01-01 00:00:00+01:00', + '2011-01-01 01:00:00+01:00', + '2011-01-01 02:00:00+01:00'], + freq='H' + ).tz_localize('UTC').tz_convert('Europe/Paris') + + exp = pd.DataFrame([[1, 1], [2, 2], [3, 3]], + index=exp_idx, columns=['a', 'b']) + + tm.assert_frame_equal(res, exp) + + idx3 = pd.date_range('2011-01-01', periods=3, + freq='H', tz='Asia/Tokyo') + df3 = pd.DataFrame({'b': [1, 2, 3]}, index=idx3) + res = pd.concat([df1, df3], axis=1) + + exp_idx = DatetimeIndex(['2010-12-31 15:00:00+00:00', + '2010-12-31 16:00:00+00:00', + '2010-12-31 17:00:00+00:00', + '2010-12-31 23:00:00+00:00', + '2011-01-01 00:00:00+00:00', + '2011-01-01 01:00:00+00:00'] + ).tz_localize('UTC') + + exp = pd.DataFrame([[np.nan, 1], [np.nan, 2], [np.nan, 3], + [1, np.nan], [2, np.nan], [3, np.nan]], + index=exp_idx, columns=['a', 'b']) + + tm.assert_frame_equal(res, exp) + @pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel]) @pytest.mark.parametrize('dt', np.sctypes['float'])