diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index ddff78c9d511f..129eddd3cc8d7 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -242,6 +242,80 @@ def union_categoricals(to_union, sort_categories=False, ignore_order=False): - sort_categories=True and Categoricals are ordered ValueError Empty list of categoricals passed + + Examples + -------- + If you want to combine categoricals that do not necessarily have the same + categories, the union_categoricals function will combine a list-like of + categoricals. The new categories will be the union of the categories being + combined. + + >>> from pandas.api.types import union_categoricals + + >>> a = pd.Categorical(["b", "c"]) + + >>> b = pd.Categorical(["a", "b"]) + + >>> union_categoricals([a, b]) + [b, c, a, b] + Categories (3, object): [b, c, a] + + By default, the resulting categories will be ordered as they appear in the + data. If you want the categories to be lexsorted, use sort_categories=True + argument. + + >>> union_categoricals([a, b], sort_categories=True) + [b, c, a, b] + Categories (3, object): [a, b, c] + + union_categoricals also works with the “easy” case of combining two + categoricals of the same categories and order information (e.g. what you + could also append for). + + >>> a = pd.Categorical(["a", "b"], ordered=True) + + >>> b = pd.Categorical(["a", "b", "a"], ordered=True) + + >>> union_categoricals([a, b]) + [a, b, a, b, a] + Categories (2, object): [a < b] + + The below raises TypeError because the categories are ordered and not + identical. + + >>> a = pd.Categorical(["a", "b"], ordered=True) + >>> b = pd.Categorical(["a", "b", "c"], ordered=True) + >>> union_categoricals([a, b]) + TypeError: to union ordered Categoricals, all categories must be the same + + Ordered categoricals with different categories or orderings can be combined + by using the ignore_ordered=True argument. + + >>> a = pd.Categorical(["a", "b", "c"], ordered=True) + + >>> b = pd.Categorical(["c", "b", "a"], ordered=True) + + >>> union_categoricals([a, b], ignore_order=True) + [a, b, c, c, b, a] + Categories (3, object): [a, b, c] + + union_categoricals also works with a CategoricalIndex, or Series containing + categorical data, but note that the resulting array will always be a plain + Categorical + + >>> a = pd.Series(["b", "c"], dtype='category') + + >>> b = pd.Series(["a", "b"], dtype='category') + + >>> union_categoricals([a, b]) + [b, c, a, b] + Categories (3, object): [b, c, a] + + Notes + ----- + To learn more about categories, please see `this link + `. + """ from pandas import Index, Categorical, CategoricalIndex, Series