diff --git a/ci/code_checks.sh b/ci/code_checks.sh index cde9f9dd43280..43c80cf80d487 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -75,7 +75,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.DataFrame.median RT03,SA01" \ -i "pandas.DataFrame.min RT03" \ -i "pandas.DataFrame.plot PR02,SA01" \ - -i "pandas.DataFrame.sem PR01,RT03,SA01" \ -i "pandas.DataFrame.std PR01,RT03,SA01" \ -i "pandas.DataFrame.sum RT03" \ -i "pandas.DataFrame.swaplevel SA01" \ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 88e4d695b8328..96943eb71c7bd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -11945,7 +11945,6 @@ def sem( ) -> Series | Any: ... @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sem") - @doc(make_doc("sem", ndim=2)) def sem( self, axis: Axis | None = 0, @@ -11954,6 +11953,76 @@ def sem( numeric_only: bool = False, **kwargs, ) -> Series | Any: + """ + Return unbiased standard error of the mean over requested axis. + + Normalized by N-1 by default. This can be changed using the ddof argument + + Parameters + ---------- + axis : {index (0), columns (1)} + For `Series` this parameter is unused and defaults to 0. + + .. warning:: + + The behavior of DataFrame.sem with ``axis=None`` is deprecated, + in a future version this will reduce over both axes and return a scalar + To retain the old behavior, pass axis=0 (or do not pass axis). + + skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. + numeric_only : bool, default False + Include only float, int, boolean columns. Not implemented for Series. + **kwargs : + Additional keywords passed. + + Returns + ------- + Series or DataFrame (if level specified) + Unbiased standard error of the mean over requested axis. + + See Also + -------- + DataFrame.var : Return unbiased variance over requested axis. + DataFrame.std : Returns sample standard deviation over requested axis. + + Examples + -------- + >>> s = pd.Series([1, 2, 3]) + >>> s.sem().round(6) + 0.57735 + + With a DataFrame + + >>> df = pd.DataFrame({"a": [1, 2], "b": [2, 3]}, index=["tiger", "zebra"]) + >>> df + a b + tiger 1 2 + zebra 2 3 + >>> df.sem() + a 0.5 + b 0.5 + dtype: float64 + + Using axis=1 + + >>> df.sem(axis=1) + tiger 0.5 + zebra 0.5 + dtype: float64 + + In this case, `numeric_only` should be set to `True` + to avoid getting an error. + + >>> df = pd.DataFrame({"a": [1, 2], "b": ["T", "Z"]}, index=["tiger", "zebra"]) + >>> df.sem(numeric_only=True) + a 0.5 + dtype: float64 + """ result = super().sem( axis=axis, skipna=skipna, ddof=ddof, numeric_only=numeric_only, **kwargs )