|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +from typing import ( |
| 4 | + Any, |
| 5 | + Callable, |
| 6 | +) |
| 7 | + |
| 8 | +import numpy as np |
| 9 | + |
| 10 | +from pandas._libs import lib |
| 11 | + |
| 12 | +import pandas as pd |
| 13 | +import pandas.core.common as com |
| 14 | + |
| 15 | + |
| 16 | +def case_when(*args, default: Any = lib.no_default) -> Callable: |
| 17 | + """ |
| 18 | + Create a callable for assignment based on a condition or multiple conditions. |
| 19 | +
|
| 20 | + This is useful when you want to assign a column based on multiple conditions. |
| 21 | +
|
| 22 | + Parameters |
| 23 | + ---------- |
| 24 | + args : Variable argument of conditions and expected values. |
| 25 | + Takes the form: |
| 26 | + `condition0`, `value0`, `condition1`, `value1`, ... |
| 27 | + `condition` can be a 1-D boolean array/series or a callable |
| 28 | + that evaluate to a 1-D boolean array/series. |
| 29 | + default : Any, default is `None`. |
| 30 | + The default value to be used if all conditions evaluate False. |
| 31 | +
|
| 32 | + Returns |
| 33 | + ------- |
| 34 | + Callable |
| 35 | + The Callable returned in `case_when` can be used with `df.assign(...)` |
| 36 | + for multi-condition assignment. See examples below for more info. |
| 37 | +
|
| 38 | + See Also |
| 39 | + -------- |
| 40 | + DataFrame.assign: Assign new columns to a DataFrame. |
| 41 | +
|
| 42 | + Examples |
| 43 | + -------- |
| 44 | + >>> df = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6])) |
| 45 | + >>> df |
| 46 | + a b |
| 47 | + 0 1 4 |
| 48 | + 1 2 5 |
| 49 | + 2 3 6 |
| 50 | +
|
| 51 | + >>> df.assign( |
| 52 | + ... new_column = pd.case_when( |
| 53 | + ... lambda x: x.a == 1, 'first', |
| 54 | + ... lambda x: (x.a > 1) & (x.b == 5), 'second', |
| 55 | + ... default='default' |
| 56 | + ... ) |
| 57 | + ... ) |
| 58 | + a b new_column |
| 59 | + 0 1 4 first |
| 60 | + 1 2 5 second |
| 61 | + 2 3 6 default |
| 62 | + """ |
| 63 | + len_args = len(args) |
| 64 | + |
| 65 | + if len_args < 2: |
| 66 | + raise ValueError("At least two arguments are required for `case_when`") |
| 67 | + if len_args % 2: |
| 68 | + raise ValueError( |
| 69 | + "The number of conditions and values do not match. " |
| 70 | + f"There are {len_args - len_args//2} conditions " |
| 71 | + f"and {len_args//2} values." |
| 72 | + ) |
| 73 | + |
| 74 | + if default is lib.no_default: |
| 75 | + default = None |
| 76 | + |
| 77 | + def _eval(df: pd.DataFrame) -> np.ndarray: |
| 78 | + booleans = [] |
| 79 | + replacements = [] |
| 80 | + |
| 81 | + for index, value in enumerate(args): |
| 82 | + if not index % 2: |
| 83 | + if callable(value): |
| 84 | + value = com.apply_if_callable(value, df) |
| 85 | + booleans.append(value) |
| 86 | + else: |
| 87 | + replacements.append(value) |
| 88 | + |
| 89 | + return np.select(booleans, replacements, default=default) |
| 90 | + |
| 91 | + return lambda df: _eval(df) |
0 commit comments