CSCD01 · fpunny · Mar 30, 2020 · Mar 31, 2020 · Mar 31, 2020 · Apr 4, 2020
diff --git a/Untitled.ipynb b/Untitled.ipynb
@@ -0,0 +1,81 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "          a         b         c         d  group\n",
+      "0  0.855664  0.237612  0.660391  0.896628      0\n",
+      "1  0.695109  0.026930  0.315360  0.784887      0\n",
+      "2  0.807515  0.301360  0.400504  0.055916      1\n",
+      "3  0.077397  0.571981  0.429654  0.180142      1\n",
+      "<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f43485b0190>\n",
+      "group\n",
+      "0    0.618052\n",
+      "1    0.235534\n",
+      "dtype: float64\n",
+      "          a_sum    a_mean    b_mean     c_sum   d_range  diff_a_b\n",
+      "group                                                            \n",
+      "0      1.550773  0.775387  0.132271  0.975751  0.111741  0.618052\n",
+      "1      0.884912  0.442456  0.436670  0.830158  0.124226  0.235534\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "df = pd.DataFrame(np.random.rand(4,4), columns=list('abcd'))\n",
+    "df['group'] = [0, 0, 1, 1]\n",
+    "\n",
+    "print(df)\n",
+    "\n",
+    "print(df.groupby('group'))\n",
+    "\n",
+    "print(df.groupby('group')['a'].max() - df.groupby('group')['b'].max())\n",
+    "\n",
+    "print(df.groupby('group').agg(\n",
+    "    diff_a_b=(['a', 'b'], lambda x: x['a'].max() - x['b'].max())\n",
+    "    a_sum=('a', 'sum'),\n",
+    "    a_mean=('a', 'mean'),\n",
+    "    b_mean=('b', 'mean'),\n",
+    "    c_sum=('c', 'sum'),\n",
+    "    d_range=('d', lambda x: x.max() - x.min()),\n",
+    "))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
@@ -680,6 +680,16 @@ must be either implemented on GroupBy or available via :ref:`dispatching
 
 .. _groupby.aggregate.cython:
 
+
+Aggregrating multiple columns
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+..ipython:: python
+
+   grouped.agg([ diff_c_d=(['C', 'D'], lambda x: x['C'].max() - x['D'].max())\n",
+
+
+
 Cython-optimized aggregation functions
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -316,6 +316,11 @@ Groupby/resample/rolling
 - Bug in :meth:`GroupBy.apply` raises ``ValueError`` when the ``by`` axis is not sorted and has duplicates and the applied ``func`` does not mutate passed in objects (:issue:`30667`)
 - Bug in :meth:`DataFrameGroupby.transform` produces incorrect result with transformation functions (:issue:`30918`)
 
+Grouby Aggregrations
+^^^^^^^^^^^^^^^^^^^^
+
+- added functionality to perform aggregrations on multiple columns
+
 Reshaping
 ^^^^^^^^^
 

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -352,8 +352,20 @@ def _aggregate(self, arg, *args, **kwargs):
                         raise SpecificationError("nested renamer is not supported")
                     elif isinstance(obj, ABCSeries):
                         raise SpecificationError("nested renamer is not supported")
-                    elif isinstance(obj, ABCDataFrame) and k not in obj.columns:
-                        raise KeyError(f"Column '{k}' does not exist!")
+                    elif isinstance(obj, ABCDataFrame):
+
+                        # OWO CHANGES
+                        # Original check
+                        if (k not in obj.columns):
+                            # Check if list thingy
+                            try:
+                                keys = np.frombuffer(k, dtype=np.dtype('<U1'))
+                                for key in keys:
+                                    # Check keys
+                                    if (key not in obj.columns):
+                                        raise KeyError(f"Column '{key}' does not exist!")
+                            except TypeError:
+                                raise KeyError(f"Column '{k}' does not exist!")
 
                 arg = new_arg
 
@@ -393,7 +405,15 @@ def _agg(arg, func):
                 """
                 result = {}
                 for fname, agg_how in arg.items():
-                    result[fname] = func(fname, agg_how)
+                    # OWO CHANGES
+                    try:
+                        items = np.frombuffer(fname, dtype=np.dtype('<U1'))
+                        _obj = {}
+                        for item in items:
+                            _obj[item] = self._gotitem(item, ndim=1, subset=None)
+                        result[fname] = agg_how[0](_obj)
+                    except TypeError:
+                        result[fname] = func(fname, agg_how)
                 return result
 
             # set the final keys
@@ -424,11 +444,9 @@ def _agg(arg, func):
 
             # no selection
             else:
-
                 try:
                     result = _agg(arg, _agg_1dim)
                 except SpecificationError:
-
                     # we are aggregating expecting all 1d-returns
                     # but we have 2d
                     result = _agg(arg, _agg_2dim)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -908,11 +908,17 @@ class DataFrameGroupBy(GroupBy):
     )
     @Appender(_shared_docs["aggregate"])
     def aggregate(self, func=None, *args, **kwargs):
-
         relabeling = func is None and is_multi_agg_with_relabel(**kwargs)
         if relabeling:
-            func, columns, order = normalize_keyword_aggregation(kwargs)
+            # OWO CHANGES
+            from types import LambdaType
+            for k, v in list(kwargs.items()):
+                if isinstance(v[0], list) & isinstance(v[1], LambdaType):
+                    # v[0] is the first parameter given (the column(s) to group)
+                    # v[1] is the 2nd parameter given and the opperation to be done to the column(s)
+                    kwargs[k] = (np.array(v[0]).tobytes(),) + v[1:]
 
+            func, columns, order = normalize_keyword_aggregation(kwargs)
             kwargs = {}
         elif isinstance(func, list) and len(func) > len(set(func)):
 

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -610,6 +610,30 @@ def test_mangled(self):
         )
         tm.assert_frame_equal(result, expected)
 
+    def test_agg_multiple_columns(self):
+        df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
+        result = df.groupby("A").agg(
+             add=(["B", "C"], lambda x: x["B"].max() + x["C"].min()),
+             minus=(["C", "B"], lambda x: x["B"].max() - x["C"].min())
+        )
+        expected = pd.DataFrame(
+            {"add": [5, 9], "minus": [-1, -1]}, index=pd.Index([0, 1], name="A")
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_agg_multi_missing_column_raises(self):
+        df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
+        with pytest.raises(KeyError, match="Column 'D' does not exist"):
+            df.groupby("A").agg(
+                 minus=(["D", "C"], lambda x: x["D"].max() - x["C"].min()),
+            )
+
+    def test_agg_multi_missing_key_raises(self):
+        df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
+        with pytest.raises(KeyError, match="D"):
+            df.groupby("A").agg(
+                 minus=(["B", "C"], lambda x: x["D"].max() - x["D"].min()),
+            )
 
 @pytest.mark.parametrize(
     "agg_col1, agg_col2, agg_col3, agg_result1, agg_result2, agg_result3",