diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst index 6d09e10f284af..61ee894f4b126 100644 --- a/doc/source/reference/arrays.rst +++ b/doc/source/reference/arrays.rst @@ -19,20 +19,21 @@ objects contained with a :class:`Index`, :class:`Series`, or For some data types, pandas extends NumPy's type system. String aliases for these types can be found at :ref:`basics.dtypes`. -=================== ========================= ============================= ============================= -Kind of Data pandas Data Type Scalar Array -=================== ========================= ============================= ============================= -TZ-aware datetime :class:`DatetimeTZDtype` :class:`Timestamp` :ref:`api.arrays.datetime` -Timedeltas (none) :class:`Timedelta` :ref:`api.arrays.timedelta` -Period (time spans) :class:`PeriodDtype` :class:`Period` :ref:`api.arrays.period` -Intervals :class:`IntervalDtype` :class:`Interval` :ref:`api.arrays.interval` -Nullable Integer :class:`Int64Dtype`, ... (none) :ref:`api.arrays.integer_na` -Categorical :class:`CategoricalDtype` (none) :ref:`api.arrays.categorical` -Sparse :class:`SparseDtype` (none) :ref:`api.arrays.sparse` -Strings :class:`StringDtype` :class:`str` :ref:`api.arrays.string` -Boolean (with NA) :class:`BooleanDtype` :class:`bool` :ref:`api.arrays.bool` -PyArrow :class:`ArrowDtype` Python Scalars or :class:`NA` :ref:`api.arrays.arrow` -=================== ========================= ============================= ============================= +=================== ========================== ============================= ============================= +Kind of Data pandas Data Type Scalar Array +=================== ========================== ============================= ============================= +TZ-aware datetime :class:`DatetimeTZDtype` :class:`Timestamp` :ref:`api.arrays.datetime` +Timedeltas (none) :class:`Timedelta` :ref:`api.arrays.timedelta` +Period (time spans) :class:`PeriodDtype` :class:`Period` :ref:`api.arrays.period` +Intervals :class:`IntervalDtype` :class:`Interval` :ref:`api.arrays.interval` +Nullable Integer :class:`Int64Dtype`, ... (none) :ref:`api.arrays.integer_na` +Nullable Float :class:`Float64Dtype`, ... (none) :ref:`api.arrays.float_na` +Categorical :class:`CategoricalDtype` (none) :ref:`api.arrays.categorical` +Sparse :class:`SparseDtype` (none) :ref:`api.arrays.sparse` +Strings :class:`StringDtype` :class:`str` :ref:`api.arrays.string` +Nullable Boolean :class:`BooleanDtype` :class:`bool` :ref:`api.arrays.bool` +PyArrow :class:`ArrowDtype` Python Scalars or :class:`NA` :ref:`api.arrays.arrow` +=================== ========================== ============================= ============================= pandas and third-party libraries can extend NumPy's type system (see :ref:`extending.extension-types`). The top-level :meth:`array` method can be used to create a new array, which may be @@ -91,13 +92,20 @@ with the :class:`arrays.DatetimeArray` extension array, which can hold timezone- or timezone-aware values. :class:`Timestamp`, a subclass of :class:`datetime.datetime`, is pandas' -scalar type for timezone-naive or timezone-aware datetime data. +scalar type for timezone-naive or timezone-aware datetime data. :class:`NaT` +is the missing value for datetime data. .. autosummary:: :toctree: api/ Timestamp +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + NaT + Properties ~~~~~~~~~~ .. autosummary:: @@ -208,13 +216,20 @@ Timedeltas ---------- NumPy can natively represent timedeltas. pandas provides :class:`Timedelta` -for symmetry with :class:`Timestamp`. +for symmetry with :class:`Timestamp`. :class:`NaT` +is the missing value for timedelta data. .. autosummary:: :toctree: api/ Timedelta +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + NaT + Properties ~~~~~~~~~~ .. autosummary:: @@ -419,6 +434,26 @@ pandas provides this through :class:`arrays.IntegerArray`. UInt16Dtype UInt32Dtype UInt64Dtype + NA + +.. _api.arrays.float_na: + +Nullable float +-------------- + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + arrays.FloatingArray + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + Float32Dtype + Float64Dtype + NA .. _api.arrays.categorical: @@ -555,6 +590,7 @@ with a bool :class:`numpy.ndarray`. :template: autosummary/class_without_autosummary.rst BooleanDtype + NA .. Dtype attributes which are manually listed in their docstrings: including diff --git a/doc/source/reference/general_functions.rst b/doc/source/reference/general_functions.rst index 474e37a85d857..02b0bf5d13dde 100644 --- a/doc/source/reference/general_functions.rst +++ b/doc/source/reference/general_functions.rst @@ -26,6 +26,7 @@ Data manipulations from_dummies factorize unique + lreshape wide_to_long Top-level missing data diff --git a/doc/source/reference/groupby.rst b/doc/source/reference/groupby.rst index 51bd659081b8f..63d7c19b7841d 100644 --- a/doc/source/reference/groupby.rst +++ b/doc/source/reference/groupby.rst @@ -27,6 +27,13 @@ Indexing, iteration Grouper +Function application helper +--------------------------- +.. autosummary:: + :toctree: api/ + + NamedAgg + .. currentmodule:: pandas.core.groupby Function application diff --git a/doc/source/reference/options.rst b/doc/source/reference/options.rst index 7316b6e9c72b1..b91ea3ffb62f5 100644 --- a/doc/source/reference/options.rst +++ b/doc/source/reference/options.rst @@ -19,3 +19,10 @@ Working with options get_option set_option option_context + +Numeric formatting +------------------ +.. autosummary:: + :toctree: api/ + + set_eng_float_format diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 7fe1d55ba55be..bd26fe40a69b8 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -108,6 +108,31 @@ class NamedAgg(NamedTuple): + """ + Helper for column specific aggregation with control over output column names. + + Subclass of typing.NamedTuple. + + Parameters + ---------- + column : Hashable + Column label in the DataFrame to apply aggfunc. + aggfunc : function or str + Function to apply to the provided column. If string, the name of a built-in + pandas function. + + Examples + -------- + >>> df = pd.DataFrame({"key": [1, 1, 2], "a": [-1, 0, 1], 1: [10, 11, 12]}) + >>> agg_a = pd.NamedAgg(column="a", aggfunc="min") + >>> agg_1 = pd.NamedAgg(column=1, aggfunc=np.mean) + >>> df.groupby("key").agg(result_a=agg_a, result_1=agg_1) + result_a result_1 + key + 1 -1 10.5 + 2 1 12.0 + """ + column: Hashable aggfunc: AggScalar diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 912c088d8b520..6d8e821ffb5d8 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -2116,11 +2116,56 @@ def __call__(self, num: float) -> str: def set_eng_float_format(accuracy: int = 3, use_eng_prefix: bool = False) -> None: """ - Alter default behavior on how float is formatted in DataFrame. - Format float in engineering format. By accuracy, we mean the number of - decimal digits after the floating point. + Format float representation in DataFrame with SI notation. - See also EngFormatter. + Parameters + ---------- + accuracy : int, default 3 + Number of decimal digits after the floating point. + use_eng_prefix : bool, default False + Whether to represent a value with SI prefixes. + + Returns + ------- + None + + Examples + -------- + >>> df = pd.DataFrame([1e-9, 1e-3, 1, 1e3, 1e6]) + >>> df + 0 + 0 1.000000e-09 + 1 1.000000e-03 + 2 1.000000e+00 + 3 1.000000e+03 + 4 1.000000e+06 + + >>> pd.set_eng_float_format(accuracy=1) + >>> df + 0 + 0 1.0E-09 + 1 1.0E-03 + 2 1.0E+00 + 3 1.0E+03 + 4 1.0E+06 + + >>> pd.set_eng_float_format(use_eng_prefix=True) + >>> df + 0 + 0 1.000n + 1 1.000m + 2 1.000 + 3 1.000k + 4 1.000M + + >>> pd.set_eng_float_format(accuracy=1, use_eng_prefix=True) + >>> df + 0 + 0 1.0n + 1 1.0m + 2 1.0 + 3 1.0k + 4 1.0M """ set_option("display.float_format", EngFormatter(accuracy, use_eng_prefix))