diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index b30e4177270b8..d67e5c1a3fd40 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -165,6 +165,7 @@ Other enhancements - :meth:`DataFrame.applymap` now supports ``na_action`` (:issue:`23803`) - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`) - :meth:`DataFrame.explode` and :meth:`Series.explode` now support exploding of sets (:issue:`35614`) +- :meth:`DataFrame.hist` now supports time series (datetime) data (:issue:`32590`) - ``Styler`` now allows direct CSS class name addition to individual data cells (:issue:`36159`) - :meth:`Rolling.mean()` and :meth:`Rolling.sum()` use Kahan summation to calculate the mean to avoid numerical problems (:issue:`10319`, :issue:`11645`, :issue:`13254`, :issue:`32761`, :issue:`36031`) - :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with datetimelike dtypes will now try to cast string arguments (listlike and scalar) to the matching datetimelike type (:issue:`36346`) diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index 89035552d4309..6d22d2ffe4a51 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -417,11 +417,16 @@ def hist_frame( if not isinstance(column, (list, np.ndarray, ABCIndexClass)): column = [column] data = data[column] - data = data._get_numeric_data() + # GH32590 + data = data.select_dtypes( + include=(np.number, "datetime64", "datetimetz"), exclude="timedelta" + ) naxes = len(data.columns) if naxes == 0: - raise ValueError("hist method requires numerical columns, nothing to plot.") + raise ValueError( + "hist method requires numerical or datetime columns, nothing to plot." + ) fig, axes = create_subplots( naxes=naxes, diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 9301a29933d45..2a6bd97c93b8e 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -10,7 +10,7 @@ from pandas.core.dtypes.api import is_list_like import pandas as pd -from pandas import DataFrame, Series +from pandas import DataFrame, Series, to_datetime import pandas._testing as tm @@ -28,6 +28,9 @@ def setup_method(self, method): mpl.rcdefaults() + self.start_date_to_int64 = 812419200000000000 + self.end_date_to_int64 = 819331200000000000 + self.mpl_ge_2_2_3 = compat.mpl_ge_2_2_3() self.mpl_ge_3_0_0 = compat.mpl_ge_3_0_0() self.mpl_ge_3_1_0 = compat.mpl_ge_3_1_0() @@ -50,6 +53,14 @@ def setup_method(self, method): "height": random.normal(66, 4, size=n), "weight": random.normal(161, 32, size=n), "category": random.randint(4, size=n), + "datetime": to_datetime( + random.randint( + self.start_date_to_int64, + self.end_date_to_int64, + size=n, + dtype=np.int64, + ) + ), } ) diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 34c881855d16a..d9a58e808661b 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -6,7 +6,7 @@ import pandas.util._test_decorators as td -from pandas import DataFrame, Index, Series +from pandas import DataFrame, Index, Series, to_datetime import pandas._testing as tm from pandas.tests.plotting.common import TestPlotBase, _check_plot_works @@ -163,17 +163,34 @@ def test_hist_df_legacy(self): _check_plot_works(self.hist_df.hist) # make sure layout is handled - df = DataFrame(randn(100, 3)) + df = DataFrame(randn(100, 2)) + df[2] = to_datetime( + np.random.randint( + self.start_date_to_int64, + self.end_date_to_int64, + size=100, + dtype=np.int64, + ) + ) with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(df.hist, grid=False) self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) assert not axes[1, 1].get_visible() + _check_plot_works(df[[2]].hist) df = DataFrame(randn(100, 1)) _check_plot_works(df.hist) # make sure layout is handled - df = DataFrame(randn(100, 6)) + df = DataFrame(randn(100, 5)) + df[5] = to_datetime( + np.random.randint( + self.start_date_to_int64, + self.end_date_to_int64, + size=100, + dtype=np.int64, + ) + ) with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(df.hist, layout=(4, 2)) self._check_axes_shape(axes, axes_num=6, layout=(4, 2)) @@ -225,18 +242,42 @@ def test_hist_df_legacy(self): ser.hist(foo="bar") @pytest.mark.slow - def test_hist_non_numerical_raises(self): - # gh-10444 - df = DataFrame(np.random.rand(10, 2)) + def test_hist_non_numerical_or_datetime_raises(self): + # gh-10444, GH32590 + df = DataFrame( + { + "a": np.random.rand(10), + "b": np.random.randint(0, 10, 10), + "c": to_datetime( + np.random.randint( + 1582800000000000000, 1583500000000000000, 10, dtype=np.int64 + ) + ), + "d": to_datetime( + np.random.randint( + 1582800000000000000, 1583500000000000000, 10, dtype=np.int64 + ), + utc=True, + ), + } + ) df_o = df.astype(object) - msg = "hist method requires numerical columns, nothing to plot." + msg = "hist method requires numerical or datetime columns, nothing to plot." with pytest.raises(ValueError, match=msg): df_o.hist() @pytest.mark.slow def test_hist_layout(self): - df = DataFrame(randn(100, 3)) + df = DataFrame(randn(100, 2)) + df[2] = to_datetime( + np.random.randint( + self.start_date_to_int64, + self.end_date_to_int64, + size=100, + dtype=np.int64, + ) + ) layout_to_expected_size = ( {"layout": None, "expected_size": (2, 2)}, # default is 2x2 @@ -268,7 +309,15 @@ def test_hist_layout(self): @pytest.mark.slow # GH 9351 def test_tight_layout(self): - df = DataFrame(randn(100, 3)) + df = DataFrame(np.random.randn(100, 2)) + df[2] = to_datetime( + np.random.randint( + self.start_date_to_int64, + self.end_date_to_int64, + size=100, + dtype=np.int64, + ) + ) _check_plot_works(df.hist) self.plt.tight_layout() @@ -355,7 +404,15 @@ def test_grouped_hist_legacy(self): from pandas.plotting._matplotlib.hist import _grouped_hist - df = DataFrame(randn(500, 2), columns=["A", "B"]) + df = DataFrame(randn(500, 1), columns=["A"]) + df["B"] = to_datetime( + np.random.randint( + self.start_date_to_int64, + self.end_date_to_int64, + size=500, + dtype=np.int64, + ) + ) df["C"] = np.random.randint(0, 4, 500) df["D"] = ["X"] * 500