-
-
Notifications
You must be signed in to change notification settings - Fork 19k
PeriodIndex count & size fix #12874
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
PeriodIndex count & size fix #12874
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -653,9 +653,6 @@ def _convert_obj(self, obj): | |
# Cannot have multiple of periods, convert to timestamp | ||
self.kind = 'timestamp' | ||
|
||
if not len(obj): | ||
self.kind = 'timestamp' | ||
|
||
# convert to timestamp | ||
if not (self.kind is None or self.kind == 'period'): | ||
obj = obj.to_timestamp(how=self.convention) | ||
|
@@ -673,18 +670,15 @@ def aggregate(self, arg, *args, **kwargs): | |
def _get_new_index(self): | ||
""" return our new index """ | ||
ax = self.ax | ||
ax_attrs = ax._get_attributes_dict() | ||
ax_attrs['freq'] = self.freq | ||
obj = self._selected_obj | ||
|
||
if len(ax) == 0: | ||
new_index = PeriodIndex(data=[], **ax_attrs) | ||
return obj.reindex(new_index) | ||
|
||
start = ax[0].asfreq(self.freq, how=self.convention) | ||
end = ax[-1].asfreq(self.freq, how='end') | ||
values = [] | ||
else: | ||
start = ax[0].asfreq(self.freq, how=self.convention) | ||
end = ax[-1].asfreq(self.freq, how='end') | ||
|
||
values = period_range(start, end, freq=self.freq).values | ||
|
||
return period_range(start, end, **ax_attrs) | ||
return ax._shallow_copy(values, freq=self.freq) | ||
|
||
|
||
def _downsample(self, how, **kwargs): | ||
""" | ||
|
@@ -705,7 +699,7 @@ def _downsample(self, how, **kwargs): | |
|
||
new_index = self._get_new_index() | ||
if len(new_index) == 0: | ||
return self._wrap_result(new_index) | ||
return self._wrap_result(self._selected_obj.reindex(new_index)) | ||
|
||
# Start vs. end of period | ||
memb = ax.asfreq(self.freq, how=self.convention) | ||
|
@@ -718,6 +712,8 @@ def _downsample(self, how, **kwargs): | |
return self._groupby_and_aggregate(grouper, how) | ||
elif is_superperiod(ax.freq, self.freq): | ||
return self.asfreq() | ||
elif ax.freq == self.freq: | ||
return self.asfreq() | ||
|
||
raise ValueError('Frequency {axfreq} cannot be ' | ||
'resampled to {freq}'.format( | ||
|
@@ -743,23 +739,24 @@ def _upsample(self, method, limit=None): | |
|
||
ax = self.ax | ||
obj = self.obj | ||
|
||
new_index = self._get_new_index() | ||
if len(new_index) == 0: | ||
return self._wrap_result(new_index) | ||
|
||
if not is_superperiod(ax.freq, self.freq): | ||
return self.asfreq() | ||
if len(new_index) == 0: | ||
return self._wrap_result(self._selected_obj.reindex(new_index)) | ||
|
||
# Start vs. end of period | ||
memb = ax.asfreq(self.freq, how=self.convention) | ||
|
||
# Get the fill indexer | ||
indexer = memb.get_indexer(new_index, method=method, limit=limit) | ||
return self._wrap_result(_take_new_index(obj, | ||
indexer, | ||
new_index, | ||
axis=self.axis)) | ||
return self._wrap_result(_take_new_index( | ||
obj, indexer, new_index, axis=self.axis)) | ||
|
||
def _groupby_and_aggregate(self, grouper, how, *args, **kwargs): | ||
|
||
if grouper is None: | ||
return self._downsample(how, **kwargs) | ||
return super(PeriodIndexResampler, self)._groupby_and_aggregate( | ||
grouper, how, *args, **kwargs) | ||
|
||
|
||
class TimedeltaResampler(DatetimeIndexResampler): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,33 +3,34 @@ | |
from datetime import datetime, timedelta | ||
from functools import partial | ||
|
||
from pandas.compat import range, lrange, zip, product, OrderedDict | ||
import nose | ||
import numpy as np | ||
|
||
import pandas as pd | ||
import pandas.tseries.offsets as offsets | ||
import pandas.util.testing as tm | ||
from pandas import (Series, DataFrame, Panel, Index, isnull, | ||
notnull, Timestamp) | ||
|
||
from pandas.compat import range, lrange, zip, product, OrderedDict | ||
from pandas.core.base import SpecificationError | ||
from pandas.core.common import ABCSeries, ABCDataFrame | ||
from pandas.core.groupby import DataError | ||
from pandas.tseries.frequencies import MONTHS, DAYS | ||
from pandas.tseries.index import date_range | ||
from pandas.tseries.tdi import timedelta_range | ||
from pandas.tseries.offsets import Minute, BDay | ||
from pandas.tseries.period import period_range, PeriodIndex, Period | ||
from pandas.tseries.resample import (DatetimeIndex, TimeGrouper, | ||
DatetimeIndexResampler) | ||
from pandas.tseries.frequencies import MONTHS, DAYS | ||
from pandas.core.common import ABCSeries, ABCDataFrame | ||
from pandas.core.base import SpecificationError | ||
|
||
import pandas.tseries.offsets as offsets | ||
import pandas as pd | ||
|
||
import nose | ||
|
||
from pandas.tseries.tdi import timedelta_range | ||
from pandas.util.testing import (assert_series_equal, assert_almost_equal, | ||
assert_frame_equal) | ||
import pandas.util.testing as tm | ||
|
||
bday = BDay() | ||
downsample_methods = ['min', 'max', 'first', 'last', 'sum', 'mean', 'sem', | ||
'median', 'prod', 'ohlc'] | ||
upsample_methods = ['count', 'size'] | ||
series_methods = ['nunique'] | ||
resample_methods = downsample_methods + upsample_methods + series_methods | ||
|
||
|
||
class TestResampleAPI(tm.TestCase): | ||
|
@@ -95,12 +96,13 @@ def test_api_changes_v018(self): | |
self.assertRaises(ValueError, lambda: r.iat[0]) | ||
self.assertRaises(ValueError, lambda: r.ix[0]) | ||
self.assertRaises(ValueError, lambda: r.loc[ | ||
Timestamp('2013-01-01 00:00:00', offset='H')]) | ||
Timestamp('2013-01-01 00:00:00', offset='H')]) | ||
self.assertRaises(ValueError, lambda: r.at[ | ||
Timestamp('2013-01-01 00:00:00', offset='H')]) | ||
Timestamp('2013-01-01 00:00:00', offset='H')]) | ||
|
||
def f(): | ||
r[0] = 5 | ||
|
||
self.assertRaises(ValueError, f) | ||
|
||
# str/repr | ||
|
@@ -144,7 +146,6 @@ def f(): | |
|
||
# comparison ops | ||
for op in ['__lt__', '__le__', '__gt__', '__ge__', '__eq__', '__ne__']: | ||
|
||
r = self.series.resample('H') | ||
|
||
with tm.assert_produces_warning(FutureWarning, | ||
|
@@ -259,6 +260,7 @@ def test_attribute_access(self): | |
# setting | ||
def f(): | ||
r.F = 'bah' | ||
|
||
self.assertRaises(ValueError, f) | ||
|
||
def test_api_compat_before_use(self): | ||
|
@@ -509,10 +511,10 @@ def test_agg_misc(self): | |
# errors | ||
# invalid names in the agg specification | ||
for t in [r, g]: | ||
|
||
def f(): | ||
r[['A']].agg({'A': ['sum', 'std'], | ||
'B': ['mean', 'std']}) | ||
|
||
self.assertRaises(SpecificationError, f) | ||
|
||
def test_agg_nested_dicts(self): | ||
|
@@ -679,7 +681,7 @@ def _ohlc(group): | |
assert_series_equal(result, expected) | ||
except BaseException as exc: | ||
|
||
exc.args += ('how=%s' % arg, ) | ||
exc.args += ('how=%s' % arg,) | ||
raise | ||
|
||
def test_resample_how_callables(self): | ||
|
@@ -692,7 +694,6 @@ def fn(x, a=1): | |
return str(type(x)) | ||
|
||
class fn_class: | ||
|
||
def __call__(self, x): | ||
return str(type(x)) | ||
|
||
|
@@ -768,7 +769,7 @@ def test_resample_rounding(self): | |
|
||
from pandas.compat import StringIO | ||
df = pd.read_csv(StringIO(data), parse_dates={'timestamp': [ | ||
'date', 'time']}, index_col='timestamp') | ||
'date', 'time']}, index_col='timestamp') | ||
df.index.name = None | ||
result = df.resample('6s').sum() | ||
expected = DataFrame({'value': [ | ||
|
@@ -1061,10 +1062,10 @@ def test_resample_ohlc_dataframe(self): | |
|
||
df.columns = [['a', 'b'], ['c', 'd']] | ||
res = df.resample('H').ohlc() | ||
exp.columns = pd.MultiIndex.from_tuples([('a', 'c', 'open'), ( | ||
'a', 'c', 'high'), ('a', 'c', 'low'), ('a', 'c', 'close'), ( | ||
'b', 'd', 'open'), ('b', 'd', 'high'), ('b', 'd', 'low'), ( | ||
'b', 'd', 'close')]) | ||
exp.columns = pd.MultiIndex.from_tuples([ | ||
('a', 'c', 'open'), ('a', 'c', 'high'), ('a', 'c', 'low'), | ||
('a', 'c', 'close'), ('b', 'd', 'open'), ('b', 'd', 'high'), | ||
('b', 'd', 'low'), ('b', 'd', 'close')]) | ||
assert_frame_equal(exp, res) | ||
|
||
# dupe columns fail atm | ||
|
@@ -1449,11 +1450,12 @@ def test_resample_anchored_multiday(self): | |
# | ||
# See: https://github.com/pydata/pandas/issues/8683 | ||
|
||
s = pd.Series(np.random.randn(5), | ||
index=pd.date_range('2014-10-14 23:06:23.206', | ||
periods=3, freq='400L') | | ||
pd.date_range('2014-10-15 23:00:00', | ||
periods=2, freq='2200L')) | ||
index = pd.date_range( | ||
'2014-10-14 23:06:23.206', periods=3, freq='400L' | ||
) | pd.date_range( | ||
'2014-10-15 23:00:00', periods=2, freq='2200L') | ||
|
||
s = pd.Series(np.random.randn(5), index=index) | ||
|
||
# Ensure left closing works | ||
result = s.resample('2200L').mean() | ||
|
@@ -1763,7 +1765,6 @@ def _simple_pts(start, end, freq='D'): | |
|
||
|
||
class TestResamplePeriodIndex(tm.TestCase): | ||
|
||
_multiprocess_can_split_ = True | ||
|
||
def test_annual_upsample_D_s_f(self): | ||
|
@@ -1907,16 +1908,40 @@ def test_resample_basic(self): | |
|
||
def test_resample_empty(self): | ||
|
||
# GH12771 | ||
# GH12771 & GH12868 | ||
index = PeriodIndex(start='2000', periods=0, freq='D', name='idx') | ||
s = Series(index=index) | ||
result = s.resample('M').sum() | ||
|
||
# after GH12774 is resolved, this should be a PeriodIndex | ||
expected_index = DatetimeIndex([], name='idx') | ||
expected_index = PeriodIndex([], name='idx', freq='M') | ||
expected = Series(index=expected_index) | ||
|
||
for method in resample_methods: | ||
result = getattr(s.resample('M'), method)() | ||
assert_series_equal(result, expected) | ||
|
||
def test_resample_count(self): | ||
|
||
# GH12774 | ||
series = pd.Series(1, index=pd.period_range(start='2000', | ||
periods=100)) | ||
result = series.resample('M').count() | ||
|
||
expected_index = pd.period_range(start='2000', freq='M', periods=4) | ||
expected = pd.Series([31, 29, 31, 9], index=expected_index) | ||
|
||
assert_series_equal(result, expected) | ||
|
||
def test_resample_same_freq(self): | ||
|
||
|
||
# GH12770 | ||
series = pd.Series(range(3), index=pd.period_range( | ||
start='2000', periods=3, freq='M')) | ||
expected = series | ||
|
||
for method in resample_methods: | ||
result = getattr(series.resample('M'), method)() | ||
assert_series_equal(result, expected) | ||
|
||
def test_with_local_timezone_pytz(self): | ||
# GH5430 | ||
tm._skip_if_no_pytz() | ||
|
@@ -2493,8 +2518,8 @@ def test_aggregate_with_nat(self): | |
# GH 9925 | ||
self.assertEqual(dt_result.index.name, 'key') | ||
|
||
# if NaT is included, 'var', 'std', 'mean', 'first','last' and 'nth' | ||
# doesn't work yet | ||
# if NaT is included, 'var', 'std', 'mean', 'first','last' | ||
# and 'nth' doesn't work yet | ||
|
||
|
||
if __name__ == '__main__': | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
so this doesn't break the test, I suspect if you did
how=self.convention
it would work no?or maybe just ignore convention entirely and make:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's the suggestion here, but it breaks that test above. Or am I misunderstanding?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
actually it I think your change makes sense. make a new sub-section on resampling with PeriodIndex (and put your bug fixes there, with a small example which shows this change).
As an aside, I think
convention
here is really refering to something completely different. IOW it has to do whether to take the start or end of the original series as the counting point for the freq, NOT the period resampling which is independent of that.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unless you strongly object I'm going to add an issue on this & follow up; I'm already a bit overextended on this and don't want to leave the stuff completed so far hanging around.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
no that's fine