diff --git a/doc/source/whatsnew/v0.15.2.txt b/doc/source/whatsnew/v0.15.2.txt index 97cbecd4bb0e7..1cd53cd575650 100644 --- a/doc/source/whatsnew/v0.15.2.txt +++ b/doc/source/whatsnew/v0.15.2.txt @@ -62,3 +62,5 @@ Bug Fixes - Bug in slicing a multi-index with an empty list and at least one boolean indexer (:issue:`8781`) - ``io.data.Options`` now raises ``RemoteDataError`` when no expiry dates are available from Yahoo (:issue:`8761`). - ``Timedelta`` kwargs may now be numpy ints and floats (:issue:`8757`). +- Bug in `pd.infer_freq`/`DataFrame.inferred_freq` that prevented proper sub-daily frequency inference + when the index contained DST days (:issue:`8772`). diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 7cd286129e936..54b29b1641309 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -698,6 +698,8 @@ def __init__(self, index, warn=True): self.index = index self.values = np.asarray(index).view('i8') + # This moves the values, which are implicitly in UTC, to the + # the timezone so they are in local time if hasattr(index,'tz'): if index.tz is not None: self.values = tslib.tz_convert(self.values, 'UTC', index.tz) @@ -712,10 +714,18 @@ def __init__(self, index, warn=True): @cache_readonly def deltas(self): return tslib.unique_deltas(self.values) + + @cache_readonly + def deltas_asi8(self): + return tslib.unique_deltas(self.index.asi8) @cache_readonly def is_unique(self): return len(self.deltas) == 1 + + @cache_readonly + def is_unique_asi8(self): + return len(self.deltas_asi8) == 1 def get_freq(self): if not self.is_monotonic or not self.index.is_unique: @@ -725,9 +735,12 @@ def get_freq(self): if _is_multiple(delta, _ONE_DAY): return self._infer_daily_rule() else: - # Possibly intraday frequency - if not self.is_unique: + # Possibly intraday frequency. Here we use the + # original .asi8 values as the modified values + # will not work around DST transitions. See #8772 + if not self.is_unique_asi8: return None + delta = self.deltas_asi8[0] if _is_multiple(delta, _ONE_HOUR): # Hours return _maybe_add_count('H', delta / _ONE_HOUR) diff --git a/pandas/tseries/tests/test_frequencies.py b/pandas/tseries/tests/test_frequencies.py index b251ae50e22d6..b84cdefe7009f 100644 --- a/pandas/tseries/tests/test_frequencies.py +++ b/pandas/tseries/tests/test_frequencies.py @@ -268,6 +268,24 @@ def test_infer_freq_tz(self): idx = DatetimeIndex(dates, tz=tz) self.assertEqual(idx.inferred_freq, expected) + def test_infer_freq_tz_transition(self): + # Tests for #8772 + date_pairs = [['2013-11-02', '2013-11-5'], #Fall DST + ['2014-03-08', '2014-03-11'], #Spring DST + ['2014-01-01', '2014-01-03']] #Regular Time + freqs = ['3H', '10T', '3601S', '3600001L', '3600000001U', '3600000000001N'] + + for tz in [None, 'Australia/Sydney', 'Asia/Tokyo', 'Europe/Paris', + 'US/Pacific', 'US/Eastern']: + for date_pair in date_pairs: + for freq in freqs: + idx = date_range(date_pair[0], date_pair[1], freq=freq, tz=tz) + print(idx) + self.assertEqual(idx.inferred_freq, freq) + + index = date_range("2013-11-03", periods=5, freq="3H").tz_localize("America/Chicago") + self.assertIsNone(index.inferred_freq) + def test_not_monotonic(self): rng = _dti(['1/31/2000', '1/31/2001', '1/31/2002']) rng = rng[::-1]