Skip to content

Do not convert series when looking at sub-daily frequencies. #8782

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 14, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.15.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,5 @@ Bug Fixes
- Bug in slicing a multi-index with an empty list and at least one boolean indexer (:issue:`8781`)
- ``io.data.Options`` now raises ``RemoteDataError`` when no expiry dates are available from Yahoo (:issue:`8761`).
- ``Timedelta`` kwargs may now be numpy ints and floats (:issue:`8757`).
- Bug in `pd.infer_freq`/`DataFrame.inferred_freq` that prevented proper sub-daily frequency inference
when the index contained DST days (:issue:`8772`).
17 changes: 15 additions & 2 deletions pandas/tseries/frequencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,8 @@ def __init__(self, index, warn=True):
self.index = index
self.values = np.asarray(index).view('i8')

# This moves the values, which are implicitly in UTC, to the
# the timezone so they are in local time
if hasattr(index,'tz'):
if index.tz is not None:
self.values = tslib.tz_convert(self.values, 'UTC', index.tz)
Expand All @@ -712,10 +714,18 @@ def __init__(self, index, warn=True):
@cache_readonly
def deltas(self):
return tslib.unique_deltas(self.values)

@cache_readonly
def deltas_asi8(self):
return tslib.unique_deltas(self.index.asi8)

@cache_readonly
def is_unique(self):
return len(self.deltas) == 1

@cache_readonly
def is_unique_asi8(self):
return len(self.deltas_asi8) == 1

def get_freq(self):
if not self.is_monotonic or not self.index.is_unique:
Expand All @@ -725,9 +735,12 @@ def get_freq(self):
if _is_multiple(delta, _ONE_DAY):
return self._infer_daily_rule()
else:
# Possibly intraday frequency
if not self.is_unique:
# Possibly intraday frequency. Here we use the
# original .asi8 values as the modified values
# will not work around DST transitions. See #8772
if not self.is_unique_asi8:
return None
delta = self.deltas_asi8[0]
if _is_multiple(delta, _ONE_HOUR):
# Hours
return _maybe_add_count('H', delta / _ONE_HOUR)
Expand Down
18 changes: 18 additions & 0 deletions pandas/tseries/tests/test_frequencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,24 @@ def test_infer_freq_tz(self):
idx = DatetimeIndex(dates, tz=tz)
self.assertEqual(idx.inferred_freq, expected)

def test_infer_freq_tz_transition(self):
# Tests for #8772
date_pairs = [['2013-11-02', '2013-11-5'], #Fall DST
['2014-03-08', '2014-03-11'], #Spring DST
['2014-01-01', '2014-01-03']] #Regular Time
freqs = ['3H', '10T', '3601S', '3600001L', '3600000001U', '3600000000001N']

for tz in [None, 'Australia/Sydney', 'Asia/Tokyo', 'Europe/Paris',
'US/Pacific', 'US/Eastern']:
for date_pair in date_pairs:
for freq in freqs:
idx = date_range(date_pair[0], date_pair[1], freq=freq, tz=tz)
print(idx)
self.assertEqual(idx.inferred_freq, freq)

index = date_range("2013-11-03", periods=5, freq="3H").tz_localize("America/Chicago")
self.assertIsNone(index.inferred_freq)

def test_not_monotonic(self):
rng = _dti(['1/31/2000', '1/31/2001', '1/31/2002'])
rng = rng[::-1]
Expand Down