diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a66d00fff9714..249703bedcc21 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4818,54 +4818,68 @@ def aggregate(self, func, axis=0, *args, **kwargs): def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, result_type=None, args=(), **kwds): - """Applies function along an axis of the DataFrame. + """ + Apply a function along an axis of the DataFrame. - Objects passed to functions are Series objects having index - either the DataFrame's index (axis=0) or the columns (axis=1). - Final return type depends on the return type of the applied function, - or on the `result_type` argument. + Objects passed to the function are Series objects whose index is + either the DataFrame's index (``axis=0``) or the DataFrame's columns + (``axis=1``). By default (``result_type=None``), the final return type + is inferred from the return type of the applied function. Otherwise, + it depends on the `result_type` argument. Parameters ---------- func : function - Function to apply to each column/row + Function to apply to each column or row. axis : {0 or 'index', 1 or 'columns'}, default 0 - * 0 or 'index': apply function to each column - * 1 or 'columns': apply function to each row - broadcast : boolean, optional - For aggregation functions, return object of same size with values - propagated + Axis along which the function is applied: + + * 0 or 'index': apply function to each column. + * 1 or 'columns': apply function to each row. + broadcast : bool, optional + Only relevant for aggregation functions: + + * ``False`` or ``None`` : returns a Series whose length is the + length of the index or the number of columns (based on the + `axis` parameter) + * ``True`` : results will be broadcast to the original shape + of the frame, the original index and columns will be retained. .. deprecated:: 0.23.0 This argument will be removed in a future version, replaced by result_type='broadcast'. - raw : boolean, default False - If False, convert each row or column into a Series. If raw=True the - passed function will receive ndarray objects instead. If you are - just applying a NumPy reduction function this will achieve much - better performance - reduce : boolean or None, default None + raw : bool, default False + * ``False`` : passes each row or column as a Series to the + function. + * ``True`` : the passed function will receive ndarray objects + instead. + If you are just applying a NumPy reduction function this will + achieve much better performance. + reduce : bool or None, default None Try to apply reduction procedures. If the DataFrame is empty, - apply will use reduce to determine whether the result should be a - Series or a DataFrame. If reduce is None (the default), apply's - return value will be guessed by calling func an empty Series (note: - while guessing, exceptions raised by func will be ignored). If - reduce is True a Series will always be returned, and if False a - DataFrame will always be returned. + `apply` will use `reduce` to determine whether the result + should be a Series or a DataFrame. If ``reduce=None`` (the + default), `apply`'s return value will be guessed by calling + `func` on an empty Series + (note: while guessing, exceptions raised by `func` will be + ignored). + If ``reduce=True`` a Series will always be returned, and if + ``reduce=False`` a DataFrame will always be returned. .. deprecated:: 0.23.0 This argument will be removed in a future version, replaced - by result_type='reduce'. + by ``result_type='reduce'``. - result_type : {'expand', 'reduce', 'broadcast, None} - These only act when axis=1 {columns}: + result_type : {'expand', 'reduce', 'broadcast', None}, default None + These only act when ``axis=1`` (columns): * 'expand' : list-like results will be turned into columns. - * 'reduce' : return a Series if possible rather than expanding - list-like results. This is the opposite to 'expand'. + * 'reduce' : returns a Series if possible rather than expanding + list-like results. This is the opposite of 'expand'. * 'broadcast' : results will be broadcast to the original shape - of the frame, the original index & columns will be retained. + of the DataFrame, the original index and columns will be + retained. The default behaviour (None) depends on the return value of the applied function: list-like results will be returned as a Series @@ -4875,61 +4889,56 @@ def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, .. versionadded:: 0.23.0 args : tuple - Positional arguments to pass to function in addition to the - array/series - Additional keyword arguments will be passed as keywords to the function + Positional arguments to pass to `func` in addition to the + array/series. + **kwds + Additional keyword arguments to pass as keywords arguments to + `func`. Notes ----- - In the current implementation apply calls func twice on the + In the current implementation apply calls `func` twice on the first column/row to decide whether it can take a fast or slow - code path. This can lead to unexpected behavior if func has + code path. This can lead to unexpected behavior if `func` has side-effects, as they will take effect twice for the first column/row. - Examples + See also -------- + DataFrame.applymap: For elementwise operations + DataFrame.aggregate: only perform aggregating type operations + DataFrame.transform: only perform transformating type operations - We use this DataFrame to illustrate + Examples + -------- - >>> df = pd.DataFrame(np.tile(np.arange(3), 6).reshape(6, -1) + 1, - ... columns=['A', 'B', 'C']) + >>> df = pd.DataFrame([[4, 9],] * 3, columns=['A', 'B']) >>> df - A B C - 0 1 2 3 - 1 1 2 3 - 2 1 2 3 - 3 1 2 3 - 4 1 2 3 - 5 1 2 3 + A B + 0 4 9 + 1 4 9 + 2 4 9 Using a numpy universal function (in this case the same as ``np.sqrt(df)``): >>> df.apply(np.sqrt) - A B C - 0 1.0 1.414214 1.732051 - 1 1.0 1.414214 1.732051 - 2 1.0 1.414214 1.732051 - 3 1.0 1.414214 1.732051 - 4 1.0 1.414214 1.732051 - 5 1.0 1.414214 1.732051 + A B + 0 2.0 3.0 + 1 2.0 3.0 + 2 2.0 3.0 Using a reducing function on either axis >>> df.apply(np.sum, axis=0) - A 6 - B 12 - C 18 + A 12 + B 27 dtype: int64 >>> df.apply(np.sum, axis=1) - 0 6 - 1 6 - 2 6 - 3 6 - 4 6 - 5 6 + 0 13 + 1 13 + 2 13 dtype: int64 Retuning a list-like will result in a Series @@ -4938,9 +4947,7 @@ def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, 0 [1, 2] 1 [1, 2] 2 [1, 2] - 3 [1, 2] - 4 [1, 2] - 5 [1, 2] + dtype: object Passing result_type='expand' will expand list-like results to columns of a Dataframe @@ -4950,42 +4957,27 @@ def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, 0 1 2 1 1 2 2 1 2 - 3 1 2 - 4 1 2 - 5 1 2 Returning a Series inside the function is similar to passing ``result_type='expand'``. The resulting column names will be the Series index. - >>> df.apply(lambda x: Series([1, 2], index=['foo', 'bar']), axis=1) + >>> df.apply(lambda x: pd.Series([1, 2], index=['foo', 'bar']), axis=1) foo bar 0 1 2 1 1 2 2 1 2 - 3 1 2 - 4 1 2 - 5 1 2 Passing ``result_type='broadcast'`` will ensure the same shape result, whether list-like or scalar is returned by the function, and broadcast it along the axis. The resulting column names will be the originals. - >>> df.apply(lambda x: [1, 2, 3], axis=1, result_type='broadcast') - A B C - 0 1 2 3 - 1 1 2 3 - 2 1 2 3 - 3 1 2 3 - 4 1 2 3 - 5 1 2 3 - - See also - -------- - DataFrame.applymap: For elementwise operations - DataFrame.aggregate: only perform aggregating type operations - DataFrame.transform: only perform transformating type operations + >>> df.apply(lambda x: [1, 2], axis=1, result_type='broadcast') + A B + 0 1 2 + 1 1 2 + 2 1 2 Returns -------