-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Groupbydocs #8231
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Groupbydocs #8231
Changes from 7 commits
ed8145b
9d910f2
a77bc2d
70ecbaa
1b4536b
d076afe
a12bef6
6d31eb7
cab5bb0
295120b
0eb9023
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -533,7 +533,7 @@ def wrapper(*args, **kwargs): | |
# a little trickery for aggregation functions that need an axis | ||
# argument | ||
kwargs_with_axis = kwargs.copy() | ||
if 'axis' not in kwargs_with_axis: | ||
if 'axis' not in kwargs_with_axis or kwargs_with_axis['axis']==None: | ||
kwargs_with_axis['axis'] = self.axis | ||
|
||
def curried_with_axis(x): | ||
|
@@ -2133,9 +2133,105 @@ def _convert_grouper(axis, grouper): | |
else: | ||
return grouper | ||
|
||
from inspect import getargspec | ||
def _make_signature(func) : | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you move this to |
||
""" | ||
Returns a string repr of the arg list of a func call, with any defaults | ||
|
||
Examples | ||
-------- | ||
|
||
>>> def f(a,b,c=2) : | ||
>>> return a*b*c | ||
>>> print(_make_signature(f)) | ||
a,b,c=2 | ||
""" | ||
spec = getargspec(func) | ||
if spec.defaults == None : | ||
n_wo_defaults = len(spec.args) | ||
defaults = ('',) * n_wo_defaults | ||
else : | ||
n_wo_defaults = len(spec.args) - len(spec.defaults) | ||
defaults = ('',) * n_wo_defaults + spec.defaults | ||
args = [] | ||
for i, (var, default) in enumerate(zip(spec.args, defaults)) : | ||
args.append(var if default=='' else var+'='+repr(default)) | ||
return args, spec.args | ||
|
||
def _whitelist_method_generator(klass, whitelist) : | ||
""" | ||
Yields all GroupBy member defs for DataFrame/Series names in _whitelist. | ||
|
||
Parameters | ||
---------- | ||
klass - class where members are defined. Should be Series or DataFrame | ||
|
||
whitelist - list of names of klass methods to be constructed | ||
|
||
Returns | ||
------- | ||
The generator yields a sequence of strings, each suitable for exec'ing, | ||
that define implementations of the named methods for DataFrameGroupBy | ||
or SeriesGroupBy. | ||
|
||
Since we don't want to override methods explicitly defined in the | ||
base class, any such name is skipped. | ||
""" | ||
|
||
method_wrapper_template = \ | ||
"""def %(name)s(%(sig)s) : | ||
\""" | ||
%(doc)s | ||
\""" | ||
f = %(self)s.__getattr__('%(name)s') | ||
return f(%(args)s)""" | ||
property_wrapper_template = \ | ||
"""@property | ||
def %(name)s(self) : | ||
\""" | ||
%(doc)s | ||
\""" | ||
return self.__getattr__('%(name)s')""" | ||
for name in whitelist : | ||
# don't override anything that was explicitly defined | ||
# in the base class | ||
if hasattr(GroupBy,name) : | ||
continue | ||
# ugly, but we need the name string itself in the method. | ||
f = getattr(klass,name) | ||
doc = f.__doc__ | ||
doc = doc if type(doc)==str else '' | ||
if type(f) == types.MethodType : | ||
wrapper_template = method_wrapper_template | ||
decl, args = _make_signature(f) | ||
# pass args by name to f because otherwise | ||
# GroupBy._make_wrapper won't know whether | ||
# we passed in an axis parameter. | ||
args_by_name = ['{0}={0}'.format(arg) for arg in args[1:]] | ||
params = {'name':name, | ||
'doc':doc, | ||
'sig':','.join(decl), | ||
'self':args[0], | ||
'args':','.join(args_by_name)} | ||
else : | ||
wrapper_template = property_wrapper_template | ||
params = {'name':name, 'doc':doc} | ||
yield wrapper_template % params | ||
|
||
class SeriesGroupBy(GroupBy): | ||
# | ||
# Make class defs of attributes on SeriesGroupBy whitelist | ||
_apply_whitelist = _series_apply_whitelist | ||
|
||
for _def_str in _whitelist_method_generator(Series,_series_apply_whitelist) : | ||
try : | ||
exec(_def_str) | ||
except SyntaxError as e : | ||
print('-'*80) | ||
print(_def_str) | ||
print('-'*80) | ||
print(e) | ||
raise e | ||
|
||
def aggregate(self, func_or_funcs, *args, **kwargs): | ||
""" | ||
Apply aggregation function or functions to groups, yielding most likely | ||
|
@@ -3045,6 +3141,16 @@ def filter(self, func, dropna=True, *args, **kwargs): | |
|
||
class DataFrameGroupBy(NDFrameGroupBy): | ||
_apply_whitelist = _dataframe_apply_whitelist | ||
# | ||
# Make class defs of attributes on DataFrameGroupBy whitelist. | ||
for _def_str in _whitelist_method_generator(DataFrame,_apply_whitelist) : | ||
try : | ||
exec(_def_str) | ||
except SyntaxError as e : | ||
print('-'*80) | ||
print(_def_str) | ||
print('-'*80) | ||
raise e | ||
|
||
_block_agg_axis = 1 | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,7 +19,7 @@ | |
assert_index_equal, assertRaisesRegexp) | ||
from pandas.compat import( | ||
range, long, lrange, StringIO, lmap, lzip, map, | ||
zip, builtins, OrderedDict | ||
zip, builtins, OrderedDict, product as cart_product | ||
) | ||
from pandas import compat | ||
from pandas.core.panel import Panel | ||
|
@@ -4328,6 +4328,36 @@ def test_groupby_whitelist(self): | |
self.assertEqual(whitelist, gb._apply_whitelist) | ||
for m in whitelist: | ||
getattr(gb, m) | ||
# Also make sure that the class itself has | ||
# the method defined (dtypes is not a method) | ||
if m not in ['dtypes'] : | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this here? |
||
self.assertTrue(hasattr(type(gb), m)) | ||
|
||
def test_regression_whitelist_methods(self) : | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what is this testing? |
||
|
||
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], | ||
['one', 'two', 'three']], | ||
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], | ||
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], | ||
names=['first', 'second']) | ||
raw_frame = DataFrame(np.random.randn(10, 3), index=index, | ||
columns=Index(['A', 'B', 'C'], name='exp')) | ||
raw_frame.ix[1, [1, 2]] = np.nan | ||
raw_frame.ix[7, [0, 1]] = np.nan | ||
|
||
for op, level, axis, skipna in cart_product(['skew', 'mad'], | ||
lrange(2), lrange(2), | ||
[True,False]) : | ||
|
||
if axis == 0 : | ||
frame = raw_frame | ||
else : | ||
frame = raw_frame.T | ||
|
||
grouped = frame.groupby(level=level,axis=axis) | ||
result = getattr(grouped,op)(skipna=skipna) | ||
expected = getattr(frame,op)(level=level,axis=axis,skipna=skipna) | ||
assert_frame_equal(result, expected) | ||
|
||
def test_groupby_blacklist(self): | ||
from string import ascii_lowercase | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you move this function inse _whitelist_method_generator? its only called from their yes (and its cleaner that way)