-
Notifications
You must be signed in to change notification settings - Fork 0
Owo #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Owo #1
Changes from all commits
d3b44fd
f6fa564
1f2d1f0
61d657c
9cbe760
1a08406
93a7a49
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
" a b c d group\n", | ||
"0 0.855664 0.237612 0.660391 0.896628 0\n", | ||
"1 0.695109 0.026930 0.315360 0.784887 0\n", | ||
"2 0.807515 0.301360 0.400504 0.055916 1\n", | ||
"3 0.077397 0.571981 0.429654 0.180142 1\n", | ||
"<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f43485b0190>\n", | ||
"group\n", | ||
"0 0.618052\n", | ||
"1 0.235534\n", | ||
"dtype: float64\n", | ||
" a_sum a_mean b_mean c_sum d_range diff_a_b\n", | ||
"group \n", | ||
"0 1.550773 0.775387 0.132271 0.975751 0.111741 0.618052\n", | ||
"1 0.884912 0.442456 0.436670 0.830158 0.124226 0.235534\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"import numpy as np\n", | ||
"\n", | ||
"df = pd.DataFrame(np.random.rand(4,4), columns=list('abcd'))\n", | ||
"df['group'] = [0, 0, 1, 1]\n", | ||
"\n", | ||
"print(df)\n", | ||
"\n", | ||
"print(df.groupby('group'))\n", | ||
"\n", | ||
"print(df.groupby('group')['a'].max() - df.groupby('group')['b'].max())\n", | ||
"\n", | ||
"print(df.groupby('group').agg(\n", | ||
" diff_a_b=(['a', 'b'], lambda x: x['a'].max() - x['b'].max())\n", | ||
" a_sum=('a', 'sum'),\n", | ||
" a_mean=('a', 'mean'),\n", | ||
" b_mean=('b', 'mean'),\n", | ||
" c_sum=('c', 'sum'),\n", | ||
" d_range=('d', lambda x: x.max() - x.min()),\n", | ||
"))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.7.6" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -352,8 +352,20 @@ def _aggregate(self, arg, *args, **kwargs): | |
raise SpecificationError("nested renamer is not supported") | ||
elif isinstance(obj, ABCSeries): | ||
raise SpecificationError("nested renamer is not supported") | ||
elif isinstance(obj, ABCDataFrame) and k not in obj.columns: | ||
raise KeyError(f"Column '{k}' does not exist!") | ||
elif isinstance(obj, ABCDataFrame): | ||
|
||
# OWO CHANGES | ||
# Original check | ||
if (k not in obj.columns): | ||
# Check if list thingy | ||
try: | ||
keys = np.frombuffer(k, dtype=np.dtype('<U1')) | ||
for key in keys: | ||
# Check keys | ||
if (key not in obj.columns): | ||
raise KeyError(f"Column '{key}' does not exist!") | ||
except TypeError: | ||
raise KeyError(f"Column '{k}' does not exist!") | ||
|
||
arg = new_arg | ||
|
||
|
@@ -393,7 +405,15 @@ def _agg(arg, func): | |
""" | ||
result = {} | ||
for fname, agg_how in arg.items(): | ||
result[fname] = func(fname, agg_how) | ||
# OWO CHANGES | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps encapsulate the changes in a new agg function to keep some consistency (agg_multi_d1? or something) |
||
try: | ||
items = np.frombuffer(fname, dtype=np.dtype('<U1')) | ||
_obj = {} | ||
for item in items: | ||
_obj[item] = self._gotitem(item, ndim=1, subset=None) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is kinda bad... but hopefully with a formal PR, we can get feedback on how to do this better (maybe pipeline method would work better if we are to keep internal consistency) |
||
result[fname] = agg_how[0](_obj) | ||
except TypeError: | ||
result[fname] = func(fname, agg_how) | ||
return result | ||
|
||
# set the final keys | ||
|
@@ -424,11 +444,9 @@ def _agg(arg, func): | |
|
||
# no selection | ||
else: | ||
|
||
try: | ||
result = _agg(arg, _agg_1dim) | ||
except SpecificationError: | ||
|
||
# we are aggregating expecting all 1d-returns | ||
# but we have 2d | ||
result = _agg(arg, _agg_2dim) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -908,11 +908,17 @@ class DataFrameGroupBy(GroupBy): | |
) | ||
@Appender(_shared_docs["aggregate"]) | ||
def aggregate(self, func=None, *args, **kwargs): | ||
|
||
relabeling = func is None and is_multi_agg_with_relabel(**kwargs) | ||
if relabeling: | ||
func, columns, order = normalize_keyword_aggregation(kwargs) | ||
# OWO CHANGES | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should make this into a new normalize_keyword_aggregation There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. While we're at it, check if second field of tuple is a lambda, since native aggregation methods wouldn't work obviously |
||
from types import LambdaType | ||
for k, v in list(kwargs.items()): | ||
if isinstance(v[0], list) & isinstance(v[1], LambdaType): | ||
# v[0] is the first parameter given (the column(s) to group) | ||
# v[1] is the 2nd parameter given and the opperation to be done to the column(s) | ||
kwargs[k] = (np.array(v[0]).tobytes(),) + v[1:] | ||
|
||
func, columns, order = normalize_keyword_aggregation(kwargs) | ||
kwargs = {} | ||
elif isinstance(func, list) and len(func) > len(set(func)): | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
might want to delete this or concert it into a testcase