diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 606bd4cc3b52d..95bd757f1994e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9306,7 +9306,6 @@ def _AXIS_NAMES(self) -> Dict[int, str]: DataFrame._add_numeric_operations() -DataFrame._add_series_or_dataframe_operations() ops.add_flex_arithmetic_methods(DataFrame) ops.add_special_arithmetic_methods(DataFrame) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fea3efedb6abb..8bdf0861175b2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6,7 +6,6 @@ import operator import pickle import re -from textwrap import dedent from typing import ( TYPE_CHECKING, Any, @@ -101,17 +100,22 @@ from pandas.core.missing import find_valid_index from pandas.core.ops import _align_method_FRAME from pandas.core.shared_docs import _shared_docs +from pandas.core.window import Expanding, ExponentialMovingWindow, Rolling, Window from pandas.io.formats import format as fmt from pandas.io.formats.format import DataFrameFormatter, format_percentiles from pandas.io.formats.printing import pprint_thing if TYPE_CHECKING: + from pandas._libs.tslibs import BaseOffset + from pandas.core.resample import Resampler from pandas.core.series import Series # noqa: F401 + from pandas.core.window.indexers import BaseIndexer # goal is to be able to define the docs close to function, while still being # able to share +_shared_docs = {**_shared_docs} _shared_doc_kwargs = dict( axes="keywords for axes", klass="Series/DataFrame", @@ -5127,51 +5131,6 @@ def pipe(self, func, *args, **kwargs): """ return com.pipe(self, func, *args, **kwargs) - _shared_docs["aggregate"] = dedent( - """ - Aggregate using one or more operations over the specified axis. - {versionadded} - Parameters - ---------- - func : function, str, list or dict - Function to use for aggregating the data. If a function, must either - work when passed a {klass} or when passed to {klass}.apply. - - Accepted combinations are: - - - function - - string function name - - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` - - dict of axis labels -> functions, function names or list of such. - {axis} - *args - Positional arguments to pass to `func`. - **kwargs - Keyword arguments to pass to `func`. - - Returns - ------- - scalar, Series or DataFrame - - The return can be: - - * scalar : when Series.agg is called with single function - * Series : when DataFrame.agg is called with a single function - * DataFrame : when DataFrame.agg is called with several functions - - Return scalar, Series or DataFrame. - {see_also} - Notes - ----- - `agg` is an alias for `aggregate`. Use the alias. - - In pandas, agg, as most operations just ignores the missing values, - and returns the operation only considering the values that are present. - - A passed user-defined-function will be passed a Series for evaluation. - {examples}""" - ) - # ---------------------------------------------------------------------- # Attribute access @@ -7448,77 +7407,6 @@ def clip( return result - _shared_docs[ - "groupby" - ] = """ - Group %(klass)s using a mapper or by a Series of columns. - - A groupby operation involves some combination of splitting the - object, applying a function, and combining the results. This can be - used to group large amounts of data and compute operations on these - groups. - - Parameters - ---------- - by : mapping, function, label, or list of labels - Used to determine the groups for the groupby. - If ``by`` is a function, it's called on each value of the object's - index. If a dict or Series is passed, the Series or dict VALUES - will be used to determine the groups (the Series' values are first - aligned; see ``.align()`` method). If an ndarray is passed, the - values are used as-is determine the groups. A label or list of - labels may be passed to group by the columns in ``self``. Notice - that a tuple is interpreted as a (single) key. - axis : {0 or 'index', 1 or 'columns'}, default 0 - Split along rows (0) or columns (1). - level : int, level name, or sequence of such, default None - If the axis is a MultiIndex (hierarchical), group by a particular - level or levels. - as_index : bool, default True - For aggregated output, return object with group labels as the - index. Only relevant for DataFrame input. as_index=False is - effectively "SQL-style" grouped output. - sort : bool, default True - Sort group keys. Get better performance by turning this off. - Note this does not influence the order of observations within each - group. Groupby preserves the order of rows within each group. - group_keys : bool, default True - When calling apply, add group keys to index to identify pieces. - squeeze : bool, default False - Reduce the dimensionality of the return type if possible, - otherwise return a consistent type. - - .. deprecated:: 1.1.0 - - observed : bool, default False - This only applies if any of the groupers are Categoricals. - If True: only show observed values for categorical groupers. - If False: show all values for categorical groupers. - - .. versionadded:: 0.23.0 - dropna : bool, default True - If True, and if group keys contain NA values, NA values together - with row/column will be dropped. - If False, NA values will also be treated as the key in groups - - .. versionadded:: 1.1.0 - - Returns - ------- - %(klass)sGroupBy - Returns a groupby object that contains information about the groups. - - See Also - -------- - resample : Convenience method for frequency conversion and resampling - of time series. - - Notes - ----- - See the `user guide - `_ for more. - """ - def asfreq( self: FrameOrSeries, freq, @@ -8427,35 +8315,6 @@ def ranker(data): return ranker(data) - _shared_docs[ - "compare" - ] = """ - Compare to another %(klass)s and show the differences. - - .. versionadded:: 1.1.0 - - Parameters - ---------- - other : %(klass)s - Object to compare with. - - align_axis : {0 or 'index', 1 or 'columns'}, default 1 - Determine which axis to align the comparison on. - - * 0, or 'index' : Resulting differences are stacked vertically - with rows drawn alternately from self and other. - * 1, or 'columns' : Resulting differences are aligned horizontally - with columns drawn alternately from self and other. - - keep_shape : bool, default False - If true, all rows and columns are kept. - Otherwise, only the ones with different values are kept. - - keep_equal : bool, default False - If true, the result keeps values that are equal. - Otherwise, equal values are shown as NaNs. - """ - @Appender(_shared_docs["compare"] % _shared_doc_kwargs) def compare( self, @@ -10585,45 +10444,21 @@ def mad(self, axis=None, skipna=None, level=None): examples=_min_examples, ) - @classmethod - def _add_series_or_dataframe_operations(cls): - """ - Add the series or dataframe only operations to the cls; evaluate - the doc strings again. - """ - from pandas.core.window import ( - Expanding, - ExponentialMovingWindow, - Rolling, - Window, - ) - - @doc(Rolling) - def rolling( - self, - window, - min_periods=None, - center=False, - win_type=None, - on=None, - axis=0, - closed=None, - ): - axis = self._get_axis_number(axis) - - if win_type is not None: - return Window( - self, - window=window, - min_periods=min_periods, - center=center, - win_type=win_type, - on=on, - axis=axis, - closed=closed, - ) + @doc(Rolling) + def rolling( + self, + window: "Union[int, timedelta, BaseOffset, BaseIndexer]", + min_periods: Optional[int] = None, + center: bool_t = False, + win_type: Optional[str] = None, + on: Optional[str] = None, + axis: Axis = 0, + closed: Optional[str] = None, + ): + axis = self._get_axis_number(axis) - return Rolling( + if win_type is not None: + return Window( self, window=window, min_periods=min_periods, @@ -10634,53 +10469,59 @@ def rolling( closed=closed, ) - cls.rolling = rolling - - @doc(Expanding) - def expanding(self, min_periods=1, center=None, axis=0): - axis = self._get_axis_number(axis) - if center is not None: - warnings.warn( - "The `center` argument on `expanding` " - "will be removed in the future", - FutureWarning, - stacklevel=2, - ) - else: - center = False + return Rolling( + self, + window=window, + min_periods=min_periods, + center=center, + win_type=win_type, + on=on, + axis=axis, + closed=closed, + ) - return Expanding(self, min_periods=min_periods, center=center, axis=axis) + @doc(Expanding) + def expanding( + self, min_periods: int = 1, center: Optional[bool_t] = None, axis: Axis = 0 + ) -> Expanding: + axis = self._get_axis_number(axis) + if center is not None: + warnings.warn( + "The `center` argument on `expanding` will be removed in the future", + FutureWarning, + stacklevel=2, + ) + else: + center = False - cls.expanding = expanding + return Expanding(self, min_periods=min_periods, center=center, axis=axis) - @doc(ExponentialMovingWindow) - def ewm( + @doc(ExponentialMovingWindow) + def ewm( + self, + com: Optional[float] = None, + span: Optional[float] = None, + halflife: Optional[Union[float, TimedeltaConvertibleTypes]] = None, + alpha: Optional[float] = None, + min_periods: int = 0, + adjust: bool_t = True, + ignore_na: bool_t = False, + axis: Axis = 0, + times: Optional[Union[str, np.ndarray, FrameOrSeries]] = None, + ) -> ExponentialMovingWindow: + axis = self._get_axis_number(axis) + return ExponentialMovingWindow( self, - com=None, - span=None, - halflife=None, - alpha=None, - min_periods=0, - adjust=True, - ignore_na=False, - axis=0, - times=None, - ): - axis = self._get_axis_number(axis) - return ExponentialMovingWindow( - self, - com=com, - span=span, - halflife=halflife, - alpha=alpha, - min_periods=min_periods, - adjust=adjust, - ignore_na=ignore_na, - axis=axis, - times=times, - ) - - cls.ewm = ewm + com=com, + span=span, + halflife=halflife, + alpha=alpha, + min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na, + axis=axis, + times=times, + ) @doc(klass=_shared_doc_kwargs["klass"], axis="") def transform(self, func, *args, **kwargs): diff --git a/pandas/core/series.py b/pandas/core/series.py index 555024ad75f5e..a852529e9b517 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5000,7 +5000,6 @@ def to_period(self, freq=None, copy=True) -> "Series": Series._add_numeric_operations() -Series._add_series_or_dataframe_operations() # Add arithmetic! ops.add_flex_arithmetic_methods(Series) diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index b81942f062b19..0aaccb47efc44 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -2,117 +2,258 @@ _shared_docs: Dict[str, str] = dict() +_shared_docs[ + "aggregate" +] = """\ +Aggregate using one or more operations over the specified axis. +{versionadded} +Parameters +---------- +func : function, str, list or dict + Function to use for aggregating the data. If a function, must either + work when passed a {klass} or when passed to {klass}.apply. + + Accepted combinations are: + + - function + - string function name + - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` + - dict of axis labels -> functions, function names or list of such. +{axis} +*args + Positional arguments to pass to `func`. +**kwargs + Keyword arguments to pass to `func`. + +Returns +------- +scalar, Series or DataFrame + + The return can be: + + * scalar : when Series.agg is called with single function + * Series : when DataFrame.agg is called with a single function + * DataFrame : when DataFrame.agg is called with several functions + + Return scalar, Series or DataFrame. +{see_also} +Notes +----- +`agg` is an alias for `aggregate`. Use the alias. + +A passed user-defined-function will be passed a Series for evaluation. +{examples}""" + +_shared_docs[ + "compare" +] = """\ +Compare to another %(klass)s and show the differences. + +.. versionadded:: 1.1.0 + +Parameters +---------- +other : %(klass)s + Object to compare with. + +align_axis : {0 or 'index', 1 or 'columns'}, default 1 + Determine which axis to align the comparison on. + + * 0, or 'index' : Resulting differences are stacked vertically + with rows drawn alternately from self and other. + * 1, or 'columns' : Resulting differences are aligned horizontally + with columns drawn alternately from self and other. + +keep_shape : bool, default False + If true, all rows and columns are kept. + Otherwise, only the ones with different values are kept. + +keep_equal : bool, default False + If true, the result keeps values that are equal. + Otherwise, equal values are shown as NaNs. +""" + +_shared_docs[ + "groupby" +] = """\ +Group %(klass)s using a mapper or by a Series of columns. + +A groupby operation involves some combination of splitting the +object, applying a function, and combining the results. This can be +used to group large amounts of data and compute operations on these +groups. + +Parameters +---------- +by : mapping, function, label, or list of labels + Used to determine the groups for the groupby. + If ``by`` is a function, it's called on each value of the object's + index. If a dict or Series is passed, the Series or dict VALUES + will be used to determine the groups (the Series' values are first + aligned; see ``.align()`` method). If an ndarray is passed, the + values are used as-is determine the groups. A label or list of + labels may be passed to group by the columns in ``self``. Notice + that a tuple is interpreted as a (single) key. +axis : {0 or 'index', 1 or 'columns'}, default 0 + Split along rows (0) or columns (1). +level : int, level name, or sequence of such, default None + If the axis is a MultiIndex (hierarchical), group by a particular + level or levels. +as_index : bool, default True + For aggregated output, return object with group labels as the + index. Only relevant for DataFrame input. as_index=False is + effectively "SQL-style" grouped output. +sort : bool, default True + Sort group keys. Get better performance by turning this off. + Note this does not influence the order of observations within each + group. Groupby preserves the order of rows within each group. +group_keys : bool, default True + When calling apply, add group keys to index to identify pieces. +squeeze : bool, default False + Reduce the dimensionality of the return type if possible, + otherwise return a consistent type. + + .. deprecated:: 1.1.0 + +observed : bool, default False + This only applies if any of the groupers are Categoricals. + If True: only show observed values for categorical groupers. + If False: show all values for categorical groupers. + + .. versionadded:: 0.23.0 +dropna : bool, default True + If True, and if group keys contain NA values, NA values together + with row/column will be dropped. + If False, NA values will also be treated as the key in groups + + .. versionadded:: 1.1.0 + +Returns +------- +%(klass)sGroupBy + Returns a groupby object that contains information about the groups. + +See Also +-------- +resample : Convenience method for frequency conversion and resampling + of time series. + +Notes +----- +See the `user guide +`_ for more. +""" _shared_docs[ "melt" -] = """ - Unpivot a DataFrame from wide to long format, optionally leaving identifiers set. - - This function is useful to massage a DataFrame into a format where one - or more columns are identifier variables (`id_vars`), while all other - columns, considered measured variables (`value_vars`), are "unpivoted" to - the row axis, leaving just two non-identifier columns, 'variable' and - 'value'. - %(versionadded)s - Parameters - ---------- - id_vars : tuple, list, or ndarray, optional - Column(s) to use as identifier variables. - value_vars : tuple, list, or ndarray, optional - Column(s) to unpivot. If not specified, uses all columns that - are not set as `id_vars`. - var_name : scalar - Name to use for the 'variable' column. If None it uses - ``frame.columns.name`` or 'variable'. - value_name : scalar, default 'value' - Name to use for the 'value' column. - col_level : int or str, optional - If columns are a MultiIndex then use this level to melt. - ignore_index : bool, default True - If True, original index is ignored. If False, the original index is retained. - Index labels will be repeated as necessary. - - .. versionadded:: 1.1.0 - - Returns - ------- - DataFrame - Unpivoted DataFrame. - - See Also - -------- - %(other)s : Identical method. - pivot_table : Create a spreadsheet-style pivot table as a DataFrame. - DataFrame.pivot : Return reshaped DataFrame organized - by given index / column values. - DataFrame.explode : Explode a DataFrame from list-like - columns to long format. - - Examples - -------- - >>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'}, - ... 'B': {0: 1, 1: 3, 2: 5}, - ... 'C': {0: 2, 1: 4, 2: 6}}) - >>> df - A B C - 0 a 1 2 - 1 b 3 4 - 2 c 5 6 - - >>> %(caller)sid_vars=['A'], value_vars=['B']) - A variable value - 0 a B 1 - 1 b B 3 - 2 c B 5 - - >>> %(caller)sid_vars=['A'], value_vars=['B', 'C']) - A variable value - 0 a B 1 - 1 b B 3 - 2 c B 5 - 3 a C 2 - 4 b C 4 - 5 c C 6 - - The names of 'variable' and 'value' columns can be customized: - - >>> %(caller)sid_vars=['A'], value_vars=['B'], - ... var_name='myVarname', value_name='myValname') - A myVarname myValname - 0 a B 1 - 1 b B 3 - 2 c B 5 - - Original index values can be kept around: - - >>> %(caller)sid_vars=['A'], value_vars=['B', 'C'], ignore_index=False) - A variable value - 0 a B 1 - 1 b B 3 - 2 c B 5 - 0 a C 2 - 1 b C 4 - 2 c C 6 - - If you have multi-index columns: - - >>> df.columns = [list('ABC'), list('DEF')] - >>> df - A B C - D E F - 0 a 1 2 - 1 b 3 4 - 2 c 5 6 - - >>> %(caller)scol_level=0, id_vars=['A'], value_vars=['B']) - A variable value - 0 a B 1 - 1 b B 3 - 2 c B 5 - - >>> %(caller)sid_vars=[('A', 'D')], value_vars=[('B', 'E')]) - (A, D) variable_0 variable_1 value - 0 a B E 1 - 1 b B E 3 - 2 c B E 5 - """ +] = """\ +Unpivot a DataFrame from wide to long format, optionally leaving identifiers set. + +This function is useful to massage a DataFrame into a format where one +or more columns are identifier variables (`id_vars`), while all other +columns, considered measured variables (`value_vars`), are "unpivoted" to +the row axis, leaving just two non-identifier columns, 'variable' and +'value'. +%(versionadded)s +Parameters +---------- +id_vars : tuple, list, or ndarray, optional + Column(s) to use as identifier variables. +value_vars : tuple, list, or ndarray, optional + Column(s) to unpivot. If not specified, uses all columns that + are not set as `id_vars`. +var_name : scalar + Name to use for the 'variable' column. If None it uses + ``frame.columns.name`` or 'variable'. +value_name : scalar, default 'value' + Name to use for the 'value' column. +col_level : int or str, optional + If columns are a MultiIndex then use this level to melt. +ignore_index : bool, default True + If True, original index is ignored. If False, the original index is retained. + Index labels will be repeated as necessary. + + .. versionadded:: 1.1.0 + +Returns +------- +DataFrame + Unpivoted DataFrame. + +See Also +-------- +%(other)s : Identical method. +pivot_table : Create a spreadsheet-style pivot table as a DataFrame. +DataFrame.pivot : Return reshaped DataFrame organized + by given index / column values. +DataFrame.explode : Explode a DataFrame from list-like + columns to long format. + +Examples +-------- +>>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'}, +... 'B': {0: 1, 1: 3, 2: 5}, +... 'C': {0: 2, 1: 4, 2: 6}}) +>>> df + A B C +0 a 1 2 +1 b 3 4 +2 c 5 6 + +>>> %(caller)sid_vars=['A'], value_vars=['B']) + A variable value +0 a B 1 +1 b B 3 +2 c B 5 + +>>> %(caller)sid_vars=['A'], value_vars=['B', 'C']) + A variable value +0 a B 1 +1 b B 3 +2 c B 5 +3 a C 2 +4 b C 4 +5 c C 6 + +The names of 'variable' and 'value' columns can be customized: + +>>> %(caller)sid_vars=['A'], value_vars=['B'], +... var_name='myVarname', value_name='myValname') + A myVarname myValname +0 a B 1 +1 b B 3 +2 c B 5 + +Original index values can be kept around: + +>>> %(caller)sid_vars=['A'], value_vars=['B', 'C'], ignore_index=False) + A variable value +0 a B 1 +1 b B 3 +2 c B 5 +0 a C 2 +1 b C 4 +2 c C 6 + +If you have multi-index columns: + +>>> df.columns = [list('ABC'), list('DEF')] +>>> df + A B C + D E F +0 a 1 2 +1 b 3 4 +2 c 5 6 + +>>> %(caller)scol_level=0, id_vars=['A'], value_vars=['B']) + A variable value +0 a B 1 +1 b B 3 +2 c B 5 + +>>> %(caller)sid_vars=[('A', 'D')], value_vars=[('B', 'E')]) + (A, D) variable_0 variable_1 value +0 a B E 1 +1 b B E 3 +2 c B E 5 +""" diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 51a067427e867..2f3058db4493b 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -7,9 +7,9 @@ from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries -from pandas.core.generic import _shared_docs from pandas.core.groupby.base import GroupByMixin from pandas.core.indexes.api import MultiIndex +from pandas.core.shared_docs import _shared_docs _shared_docs = dict(**_shared_docs) _doc_template = """ diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index baabdf0fca29a..f5e3587ed02d5 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -22,7 +22,7 @@ from pandas._libs.tslibs import BaseOffset, to_offset import pandas._libs.window.aggregations as window_aggregations -from pandas._typing import ArrayLike, Axis, FrameOrSeriesUnion, Label +from pandas._typing import ArrayLike, Axis, FrameOrSeries, FrameOrSeriesUnion, Label from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution, cache_readonly, doc @@ -159,7 +159,7 @@ class _Window(PandasObject, ShallowMixin, SelectionMixin): def __init__( self, - obj: FrameOrSeriesUnion, + obj: FrameOrSeries, window=None, min_periods: Optional[int] = None, center: bool = False,