Add case_when API

ELHoussineT · ELHoussineT · commit 3b775e30b453 · 2023-01-06T11:57:10.000+01:00
* Used to support conditional assignment operation.
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -14,6 +14,26 @@ including other versions of pandas.
 Enhancements
 ~~~~~~~~~~~~
 
+.. _whatsnew_200.enhancements.case_when:
+
+Assignment based on multiple conditions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``pd.case_when`` API has now been added to support assignment based on multiple conditions.
+
+.. ipython:: python
+
+   import pandas as pd
+
+   df = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6]))
+   df.assign(
+       new_column=pd.case_when(
+           lambda x: x.a == 1, 'first',
+           lambda x: (x.a > 1) & (x.b == 5), 'second',
+           default='default',
+       )
+   )
+
 .. _whatsnew_200.enhancements.optional_dependency_management_pip:
 
 Installing optional dependencies with pip extras
diff --git a/pandas/__init__.py b/pandas/__init__.py
@@ -72,6 +72,7 @@
     notnull,
     # indexes
     Index,
+    case_when,
     CategoricalIndex,
     RangeIndex,
     MultiIndex,
@@ -231,6 +232,7 @@
 __all__ = [
     "ArrowDtype",
     "BooleanDtype",
+    "case_when",
     "Categorical",
     "CategoricalDtype",
     "CategoricalIndex",
diff --git a/pandas/core/api.py b/pandas/core/api.py
@@ -42,6 +42,7 @@
     UInt64Dtype,
 )
 from pandas.core.arrays.string_ import StringDtype
+from pandas.core.case_when import case_when
 from pandas.core.construction import array
 from pandas.core.flags import Flags
 from pandas.core.groupby import (
@@ -84,11 +85,13 @@
 # DataFrame needs to be imported after NamedAgg to avoid a circular import
 from pandas.core.frame import DataFrame  # isort:skip
 
+
 __all__ = [
     "array",
     "ArrowDtype",
     "bdate_range",
     "BooleanDtype",
+    "case_when",
     "Categorical",
     "CategoricalDtype",
     "CategoricalIndex",
diff --git a/pandas/core/case_when.py b/pandas/core/case_when.py
@@ -0,0 +1,91 @@
+from __future__ import annotations
+
+from typing import (
+    Any,
+    Callable,
+)
+
+import numpy as np
+
+from pandas._libs import lib
+
+import pandas as pd
+import pandas.core.common as com
+
+
+def case_when(*args, default: Any = lib.no_default) -> Callable:
+    """
+    Create a callable for assignment based on a condition or multiple conditions.
+
+    This is useful when you want to assign a column based on multiple conditions.
+
+    Parameters
+    ----------
+    args : Variable argument of conditions and expected values.
+        Takes the form:
+            `condition0`, `value0`, `condition1`, `value1`, ...
+        `condition` can be a 1-D boolean array/series or a callable
+        that evaluate to a 1-D boolean array/series.
+    default : Any, default is `None`.
+        The default value to be used if all conditions evaluate False.
+
+    Returns
+    -------
+    Callable
+        The Callable returned in `case_when` can be used with `df.assign(...)`
+        for multi-condition assignment. See examples below for more info.
+
+    See Also
+    --------
+    DataFrame.assign: Assign new columns to a DataFrame.
+
+    Examples
+    --------
+    >>> df = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6]))
+    >>> df
+       a  b
+    0  1  4
+    1  2  5
+    2  3  6
+
+    >>> df.assign(
+    ...     new_column = pd.case_when(
+    ...         lambda x: x.a == 1, 'first',
+    ...         lambda x: (x.a > 1) & (x.b == 5), 'second',
+    ...         default='default'
+    ...     )
+    ... )
+       a  b new_column
+    0  1  4      first
+    1  2  5     second
+    2  3  6    default
+    """
+    len_args = len(args)
+
+    if len_args < 2:
+        raise ValueError("At least two arguments are required for `case_when`")
+    if len_args % 2:
+        raise ValueError(
+            "The number of conditions and values do not match. "
+            f"There are {len_args - len_args//2} conditions "
+            f"and {len_args//2} values."
+        )
+
+    if default is lib.no_default:
+        default = None
+
+    def _eval(df: pd.DataFrame) -> np.ndarray:
+        booleans = []
+        replacements = []
+
+        for index, value in enumerate(args):
+            if not index % 2:
+                if callable(value):
+                    value = com.apply_if_callable(value, df)
+                booleans.append(value)
+            else:
+                replacements.append(value)
+
+        return np.select(booleans, replacements, default=default)
+
+    return lambda df: _eval(df)
diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py
@@ -99,6 +99,7 @@ class TestPDApi(Base):
     funcs = [
         "array",
         "bdate_range",
+        "case_when",
         "concat",
         "crosstab",
         "cut",