diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index f5926c2d011ee..06c93541a7783 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -196,6 +196,7 @@ Bug Fixes - Bug in Series 0-division with a float and integer operand dtypes (:issue:`7785`) - Bug in ``Series.astype("unicode")`` not calling ``unicode`` on the values correctly (:issue:`7758`) - Bug in ``DataFrame.as_matrix()`` with mixed ``datetime64[ns]`` and ``timedelta64[ns]`` dtypes (:issue:`7778`) +- Bug in ``HDFStore.select_column()`` not preserving UTC timezone info when selecting a DatetimeIndex (:issue:`7777`) - Bug in pickles contains ``DateOffset`` may raise ``AttributeError`` when ``normalize`` attribute is reffered internally (:issue:`7748`) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index cecbb407d0bd1..c130ed4fc52ba 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -62,6 +62,18 @@ def _ensure_encoding(encoding): encoding = _default_encoding return encoding +def _set_tz(values, tz, preserve_UTC=False): + """ set the timezone if values are an Index """ + if tz is not None and isinstance(values, Index): + tz = _ensure_decoded(tz) + if values.tz is None: + values = values.tz_localize('UTC').tz_convert(tz) + if preserve_UTC: + if tslib.get_timezone(tz) == 'UTC': + values = list(values) + + return values + Term = Expr @@ -1464,11 +1476,7 @@ def convert(self, values, nan_rep, encoding): kwargs['freq'] = None self.values = Index(values, **kwargs) - # set the timezone if indicated - # we stored in utc, so reverse to local timezone - if self.tz is not None: - self.values = self.values.tz_localize( - 'UTC').tz_convert(_ensure_decoded(self.tz)) + self.values = _set_tz(self.values, self.tz) return self @@ -3443,8 +3451,11 @@ def read_column(self, column, where=None, start=None, stop=None, **kwargs): # column must be an indexable or a data column c = getattr(self.table.cols, column) a.set_info(self.info) - return Series(a.convert(c[start:stop], nan_rep=self.nan_rep, - encoding=self.encoding).take_data()) + return Series(_set_tz(a.convert(c[start:stop], + nan_rep=self.nan_rep, + encoding=self.encoding + ).take_data(), + a.tz, True)) raise KeyError("column [%s] not found in the table" % column) diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index c602e8ff1a888..8d7f007f0bda7 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -4299,6 +4299,38 @@ def test_tseries_indices_frame(self): self.assertEqual(type(result.index), type(df.index)) self.assertEqual(result.index.freq, df.index.freq) + def test_tseries_select_index_column(self): + # GH7777 + # selecting a UTC datetimeindex column did + # not preserve UTC tzinfo set before storing + + # check that no tz still works + rng = date_range('1/1/2000', '1/30/2000') + frame = DataFrame(np.random.randn(len(rng), 4), index=rng) + + with ensure_clean_store(self.path) as store: + store.append('frame', frame) + result = store.select_column('frame', 'index') + self.assertEqual(rng.tz, DatetimeIndex(result.values).tz) + + # check utc + rng = date_range('1/1/2000', '1/30/2000', tz='UTC') + frame = DataFrame(np.random.randn(len(rng), 4), index=rng) + + with ensure_clean_store(self.path) as store: + store.append('frame', frame) + result = store.select_column('frame', 'index') + self.assertEqual(rng.tz, DatetimeIndex(result.values).tz) + + # double check non-utc + rng = date_range('1/1/2000', '1/30/2000', tz='US/Eastern') + frame = DataFrame(np.random.randn(len(rng), 4), index=rng) + + with ensure_clean_store(self.path) as store: + store.append('frame', frame) + result = store.select_column('frame', 'index') + self.assertEqual(rng.tz, DatetimeIndex(result.values).tz) + def test_unicode_index(self): unicode_values = [u('\u03c3'), u('\u03c3\u03c3')]