diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 567b6853bd633..22b83425b58c2 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -292,6 +292,7 @@ Other enhancements - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now accept an ``errors`` argument (:issue:`22610`) - :meth:`groupby.transform` now allows ``func`` to be ``pad``, ``backfill`` and ``cumcount`` (:issue:`31269`). - :meth:`~pandas.io.json.read_json` now accepts `nrows` parameter. (:issue:`33916`). +- :meth:`DataFrame.hist`, :meth:`Series.hist`, :meth:`core.groupby.DataFrameGroupBy.hist`, and :meth:`core.groupby.SeriesGroupBy.hist` have gained the ``legend`` argument. Set to True to show a legend in the histogram. (:issue:`6279`) - :func:`concat` and :meth:`~DataFrame.append` now preserve extension dtypes, for example combining a nullable integer column with a numpy integer column will no longer result in object dtype but preserve the integer dtype (:issue:`33607`, :issue:`34339`). diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 32cd89383dde9..4f5b7b2d7a888 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1,7 +1,9 @@ import importlib +from typing import TYPE_CHECKING, Optional, Sequence, Tuple, Union from pandas._config import get_option +from pandas._typing import Label from pandas.util._decorators import Appender, Substitution from pandas.core.dtypes.common import is_integer, is_list_like @@ -9,19 +11,23 @@ from pandas.core.base import PandasObject +if TYPE_CHECKING: + from pandas import DataFrame + def hist_series( self, by=None, ax=None, - grid=True, - xlabelsize=None, - xrot=None, - ylabelsize=None, - yrot=None, - figsize=None, - bins=10, - backend=None, + grid: bool = True, + xlabelsize: Optional[int] = None, + xrot: Optional[float] = None, + ylabelsize: Optional[int] = None, + yrot: Optional[float] = None, + figsize: Optional[Tuple[int, int]] = None, + bins: Union[int, Sequence[int]] = 10, + backend: Optional[str] = None, + legend: bool = False, **kwargs, ): """ @@ -58,6 +64,11 @@ def hist_series( .. versionadded:: 1.0.0 + legend : bool, default False + Whether to show the legend. + + ..versionadded:: 1.1.0 + **kwargs To be passed to the actual plotting function. @@ -82,26 +93,28 @@ def hist_series( yrot=yrot, figsize=figsize, bins=bins, + legend=legend, **kwargs, ) def hist_frame( - data, - column=None, + data: "DataFrame", + column: Union[Label, Sequence[Label]] = None, by=None, - grid=True, - xlabelsize=None, - xrot=None, - ylabelsize=None, - yrot=None, + grid: bool = True, + xlabelsize: Optional[int] = None, + xrot: Optional[float] = None, + ylabelsize: Optional[int] = None, + yrot: Optional[float] = None, ax=None, - sharex=False, - sharey=False, - figsize=None, - layout=None, - bins=10, - backend=None, + sharex: bool = False, + sharey: bool = False, + figsize: Optional[Tuple[int, int]] = None, + layout: Optional[Tuple[int, int]] = None, + bins: Union[int, Sequence[int]] = 10, + backend: Optional[str] = None, + legend: bool = False, **kwargs, ): """ @@ -154,6 +167,7 @@ def hist_frame( bin edges are calculated and returned. If bins is a sequence, gives bin edges, including left edge of first bin and right edge of last bin. In this case, bins is returned unmodified. + backend : str, default None Backend to use instead of the backend specified in the option ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to @@ -162,6 +176,11 @@ def hist_frame( .. versionadded:: 1.0.0 + legend : bool, default False + Whether to show the legend. + + ..versionadded:: 1.1.0 + **kwargs All other plotting keyword arguments to be passed to :meth:`matplotlib.pyplot.hist`. @@ -203,6 +222,7 @@ def hist_frame( sharey=sharey, figsize=figsize, layout=layout, + legend=legend, bins=bins, **kwargs, ) diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index b0ce43dc2eb36..ee41479b3c7c9 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -225,6 +225,7 @@ def _grouped_hist( xrot=None, ylabelsize=None, yrot=None, + legend=False, **kwargs, ): """ @@ -243,15 +244,26 @@ def _grouped_hist( sharey : bool, default False rot : int, default 90 grid : bool, default True + legend: : bool, default False kwargs : dict, keyword arguments passed to matplotlib.Axes.hist Returns ------- collection of Matplotlib Axes """ + if legend: + assert "label" not in kwargs + if data.ndim == 1: + kwargs["label"] = data.name + elif column is None: + kwargs["label"] = data.columns + else: + kwargs["label"] = column def plot_group(group, ax): ax.hist(group.dropna().values, bins=bins, **kwargs) + if legend: + ax.legend() if xrot is None: xrot = rot @@ -290,10 +302,14 @@ def hist_series( yrot=None, figsize=None, bins=10, + legend: bool = False, **kwds, ): import matplotlib.pyplot as plt + if legend and "label" in kwds: + raise ValueError("Cannot use both legend and label") + if by is None: if kwds.get("layout", None) is not None: raise ValueError("The 'layout' keyword is not supported when 'by' is None") @@ -308,8 +324,11 @@ def hist_series( elif ax.get_figure() != fig: raise AssertionError("passed axis not bound to passed figure") values = self.dropna().values - + if legend: + kwds["label"] = self.name ax.hist(values, bins=bins, **kwds) + if legend: + ax.legend() ax.grid(grid) axes = np.array([ax]) @@ -334,6 +353,7 @@ def hist_series( xrot=xrot, ylabelsize=ylabelsize, yrot=yrot, + legend=legend, **kwds, ) @@ -358,8 +378,11 @@ def hist_frame( figsize=None, layout=None, bins=10, + legend: bool = False, **kwds, ): + if legend and "label" in kwds: + raise ValueError("Cannot use both legend and label") if by is not None: axes = _grouped_hist( data, @@ -376,6 +399,7 @@ def hist_frame( xrot=xrot, ylabelsize=ylabelsize, yrot=yrot, + legend=legend, **kwds, ) return axes @@ -401,11 +425,17 @@ def hist_frame( ) _axes = _flatten(axes) + can_set_label = "label" not in kwds + for i, col in enumerate(data.columns): ax = _axes[i] + if legend and can_set_label: + kwds["label"] = col ax.hist(data[col].dropna().values, bins=bins, **kwds) ax.set_title(col) ax.grid(grid) + if legend: + ax.legend() _set_ticks_props( axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot diff --git a/pandas/tests/plotting/test_groupby.py b/pandas/tests/plotting/test_groupby.py index 238639bd3732d..4ac23c2cffa15 100644 --- a/pandas/tests/plotting/test_groupby.py +++ b/pandas/tests/plotting/test_groupby.py @@ -2,10 +2,11 @@ import numpy as np +import pytest import pandas.util._test_decorators as td -from pandas import DataFrame, Series +from pandas import DataFrame, Index, Series import pandas._testing as tm from pandas.tests.plotting.common import TestPlotBase @@ -65,3 +66,49 @@ def test_plot_kwargs(self): res = df.groupby("z").plot.scatter(x="x", y="y") assert len(res["a"].collections) == 1 + + @pytest.mark.parametrize("column, expected_axes_num", [(None, 2), ("b", 1)]) + def test_groupby_hist_frame_with_legend(self, column, expected_axes_num): + # GH 6279 - DataFrameGroupBy histogram can have a legend + expected_layout = (1, expected_axes_num) + expected_labels = column or [["a"], ["b"]] + + index = Index(15 * ["1"] + 15 * ["2"], name="c") + df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"]) + g = df.groupby("c") + + for axes in g.hist(legend=True, column=column): + self._check_axes_shape( + axes, axes_num=expected_axes_num, layout=expected_layout + ) + for ax, expected_label in zip(axes[0], expected_labels): + self._check_legend_labels(ax, expected_label) + + @pytest.mark.parametrize("column", [None, "b"]) + def test_groupby_hist_frame_with_legend_raises(self, column): + # GH 6279 - DataFrameGroupBy histogram with legend and label raises + index = Index(15 * ["1"] + 15 * ["2"], name="c") + df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"]) + g = df.groupby("c") + + with pytest.raises(ValueError, match="Cannot use both legend and label"): + g.hist(legend=True, column=column, label="d") + + def test_groupby_hist_series_with_legend(self): + # GH 6279 - SeriesGroupBy histogram can have a legend + index = Index(15 * ["1"] + 15 * ["2"], name="c") + df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"]) + g = df.groupby("c") + + for ax in g["a"].hist(legend=True): + self._check_axes_shape(ax, axes_num=1, layout=(1, 1)) + self._check_legend_labels(ax, ["1", "2"]) + + def test_groupby_hist_series_with_legend_raises(self): + # GH 6279 - SeriesGroupBy histogram with legend and label raises + index = Index(15 * ["1"] + 15 * ["2"], name="c") + df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"]) + g = df.groupby("c") + + with pytest.raises(ValueError, match="Cannot use both legend and label"): + g.hist(legend=True, label="d") diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 0d3425d001229..b6a6c326c3df3 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -6,7 +6,7 @@ import pandas.util._test_decorators as td -from pandas import DataFrame, Series +from pandas import DataFrame, Index, Series import pandas._testing as tm from pandas.tests.plotting.common import TestPlotBase, _check_plot_works @@ -129,6 +129,29 @@ def test_plot_fails_when_ax_differs_from_figure(self): with pytest.raises(AssertionError): self.ts.hist(ax=ax1, figure=fig2) + @pytest.mark.parametrize( + "by, expected_axes_num, expected_layout", [(None, 1, (1, 1)), ("b", 2, (1, 2))] + ) + def test_hist_with_legend(self, by, expected_axes_num, expected_layout): + # GH 6279 - Series histogram can have a legend + index = 15 * ["1"] + 15 * ["2"] + s = Series(np.random.randn(30), index=index, name="a") + s.index.name = "b" + + axes = _check_plot_works(s.hist, legend=True, by=by) + self._check_axes_shape(axes, axes_num=expected_axes_num, layout=expected_layout) + self._check_legend_labels(axes, "a") + + @pytest.mark.parametrize("by", [None, "b"]) + def test_hist_with_legend_raises(self, by): + # GH 6279 - Series histogram with legend and label raises + index = 15 * ["1"] + 15 * ["2"] + s = Series(np.random.randn(30), index=index, name="a") + s.index.name = "b" + + with pytest.raises(ValueError, match="Cannot use both legend and label"): + s.hist(legend=True, by=by, label="c") + @td.skip_if_no_mpl class TestDataFramePlots(TestPlotBase): @@ -293,6 +316,36 @@ def test_hist_column_order_unchanged(self, column, expected): assert result == expected + @pytest.mark.parametrize("by", [None, "c"]) + @pytest.mark.parametrize("column", [None, "b"]) + def test_hist_with_legend(self, by, column): + # GH 6279 - DataFrame histogram can have a legend + expected_axes_num = 1 if by is None and column is not None else 2 + expected_layout = (1, expected_axes_num) + expected_labels = column or ["a", "b"] + if by is not None: + expected_labels = [expected_labels] * 2 + + index = Index(15 * ["1"] + 15 * ["2"], name="c") + df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"]) + + axes = _check_plot_works(df.hist, legend=True, by=by, column=column) + self._check_axes_shape(axes, axes_num=expected_axes_num, layout=expected_layout) + if by is None and column is None: + axes = axes[0] + for expected_label, ax in zip(expected_labels, axes): + self._check_legend_labels(ax, expected_label) + + @pytest.mark.parametrize("by", [None, "c"]) + @pytest.mark.parametrize("column", [None, "b"]) + def test_hist_with_legend_raises(self, by, column): + # GH 6279 - DataFrame histogram with legend and label raises + index = Index(15 * ["1"] + 15 * ["2"], name="c") + df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"]) + + with pytest.raises(ValueError, match="Cannot use both legend and label"): + df.hist(legend=True, by=by, column=column, label="d") + @td.skip_if_no_mpl class TestDataFrameGroupByPlots(TestPlotBase):