From 7e461a18d9f6928132afec6f48ce968b3e989ba6 Mon Sep 17 00:00:00 2001
From: Kaiqi Dong <kaiqi@kth.se>
Date: Mon, 3 Dec 2018 17:43:52 +0100
Subject: [PATCH 001/142] remove \n from docstring

---
 pandas/core/arrays/datetimes.py  | 26 +++++++++++++-------------
 pandas/core/arrays/timedeltas.py | 16 ++++++++--------
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index cfe3afcf3730a..b3df505d56d78 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -82,7 +82,7 @@ def f(self):
         return result
 
     f.__name__ = name
-    f.__doc__ = docstring
+    f.__doc__ = "\n{}\n".format(docstring)
     return property(f)
 
 
@@ -1072,19 +1072,19 @@ def date(self):
 
         return tslib.ints_to_pydatetime(timestamps, box="date")
 
-    year = _field_accessor('year', 'Y', "\n The year of the datetime\n")
+    year = _field_accessor('year', 'Y', "The year of the datetime")
     month = _field_accessor('month', 'M',
-                            "\n The month as January=1, December=12 \n")
-    day = _field_accessor('day', 'D', "\nThe days of the datetime\n")
-    hour = _field_accessor('hour', 'h', "\nThe hours of the datetime\n")
-    minute = _field_accessor('minute', 'm', "\nThe minutes of the datetime\n")
-    second = _field_accessor('second', 's', "\nThe seconds of the datetime\n")
+                            "The month as January=1, December=12")
+    day = _field_accessor('day', 'D', "The days of the datetime")
+    hour = _field_accessor('hour', 'h', "The hours of the datetime")
+    minute = _field_accessor('minute', 'm', "The minutes of the datetime")
+    second = _field_accessor('second', 's', "The seconds of the datetime")
     microsecond = _field_accessor('microsecond', 'us',
-                                  "\nThe microseconds of the datetime\n")
+                                  "The microseconds of the datetime")
     nanosecond = _field_accessor('nanosecond', 'ns',
-                                 "\nThe nanoseconds of the datetime\n")
+                                 "The nanoseconds of the datetime")
     weekofyear = _field_accessor('weekofyear', 'woy',
-                                 "\nThe week ordinal of the year\n")
+                                 "The week ordinal of the year")
     week = weekofyear
     _dayofweek_doc = """
     The day of the week with Monday=0, Sunday=6.
@@ -1129,12 +1129,12 @@ def date(self):
         "The name of day in a week (ex: Friday)\n\n.. deprecated:: 0.23.0")
 
     dayofyear = _field_accessor('dayofyear', 'doy',
-                                "\nThe ordinal day of the year\n")
-    quarter = _field_accessor('quarter', 'q', "\nThe quarter of the date\n")
+                                "The ordinal day of the year")
+    quarter = _field_accessor('quarter', 'q', "The quarter of the date")
     days_in_month = _field_accessor(
         'days_in_month',
         'dim',
-        "\nThe number of days in the month\n")
+        "The number of days in the month")
     daysinmonth = days_in_month
     _is_month_doc = """
         Indicates whether the date is the {first_or_last} day of the month.
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
index 830283d31a929..4afc9f5483c2a 100644
--- a/pandas/core/arrays/timedeltas.py
+++ b/pandas/core/arrays/timedeltas.py
@@ -59,7 +59,7 @@ def f(self):
         return result
 
     f.__name__ = name
-    f.__doc__ = docstring
+    f.__doc__ = "\n{}\n".format(docstring)
     return property(f)
 
 
@@ -684,16 +684,16 @@ def to_pytimedelta(self):
         return tslibs.ints_to_pytimedelta(self.asi8)
 
     days = _field_accessor("days", "days",
-                           "\nNumber of days for each element.\n")
+                           "Number of days for each element.")
     seconds = _field_accessor("seconds", "seconds",
-                              "\nNumber of seconds (>= 0 and less than 1 day) "
-                              "for each element.\n")
+                              "Number of seconds (>= 0 and less than 1 day) "
+                              "for each element.")
     microseconds = _field_accessor("microseconds", "microseconds",
-                                   "\nNumber of microseconds (>= 0 and less "
-                                   "than 1 second) for each element.\n")
+                                   "Number of microseconds (>= 0 and less "
+                                   "than 1 second) for each element.")
     nanoseconds = _field_accessor("nanoseconds", "nanoseconds",
-                                  "\nNumber of nanoseconds (>= 0 and less "
-                                  "than 1 microsecond) for each element.\n")
+                                  "Number of nanoseconds (>= 0 and less "
+                                  "than 1 microsecond) for each element.")
 
     @property
     def components(self):

From b2f45a61958c22d11e03de621a09c47169a07d03 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 10 Sep 2019 18:38:58 +0200
Subject: [PATCH 002/142] fix by in hist

---
 pandas/plotting/_core.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index 837b01974be93..82809f9d9ebef 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -679,6 +679,8 @@ def _get_call_args(backend_name, data, args, kwargs):
                 ("xerr", None),
                 ("secondary_y", False),
                 ("sort_columns", False),
+                ("by", None),
+                ("column", None),
             ]
         else:
             raise TypeError(
@@ -790,6 +792,12 @@ def __call__(self, *args, **kwargs):
                         )
                     label_name = label_kw or data.columns
                     data.columns = label_name
+            if kwargs.get("by") is not None:
+                grouped = data.groupby(kwargs.get("by"))
+                if kwargs.get("column") is not None:
+                    grouped = grouped[kwargs.get("column")]
+
+                data = grouped
 
         return plot_backend.plot(data, kind=kind, **kwargs)
 

From 8b6e00a59268b2e3977d0106f3815fd4b08612e5 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 10 Sep 2019 20:28:07 +0200
Subject: [PATCH 003/142] make plot work

---
 pandas/plotting/_core.py            |  9 +++--
 pandas/plotting/_matplotlib/hist.py | 63 +++++++++++++++++++----------
 2 files changed, 47 insertions(+), 25 deletions(-)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index 82809f9d9ebef..e45c3e511f25d 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -793,12 +793,15 @@ def __call__(self, *args, **kwargs):
                     label_name = label_kw or data.columns
                     data.columns = label_name
             if kwargs.get("by") is not None:
+                import pandas as pd
                 grouped = data.groupby(kwargs.get("by"))
                 if kwargs.get("column") is not None:
                     grouped = grouped[kwargs.get("column")]
-
-                data = grouped
-
+                d = {}
+                for key, group in grouped:
+                    d[key] = group
+                data = pd.DataFrame(d)
+                kwargs.pop("column")
         return plot_backend.plot(data, kind=kind, **kwargs)
 
     def line(self, x=None, y=None, **kwargs):
diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 5213e09f14067..1defa6116bbcc 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -20,22 +20,24 @@ class HistPlot(LinePlot):
     def __init__(self, data, bins=10, bottom=0, **kwargs):
         self.bins = bins  # use mpl default
         self.bottom = bottom
+        self.by = kwargs["by"]
         # Do not call LinePlot.__init__ which may fill nan
         MPLPlot.__init__(self, data, **kwargs)
 
     def _args_adjust(self):
-        if is_integer(self.bins):
-            # create common bin edge
-            values = self.data._convert(datetime=True)._get_numeric_data()
-            values = np.ravel(values)
-            values = values[~isna(values)]
-
-            hist, self.bins = np.histogram(
-                values,
-                bins=self.bins,
-                range=self.kwds.get("range", None),
-                weights=self.kwds.get("weights", None),
-            )
+        if self.by is None:
+            if is_integer(self.bins):
+                # create common bin edge
+                values = self.data._convert(datetime=True)._get_numeric_data()
+                values = np.ravel(values)
+                values = values[~isna(values)]
+
+                hist, self.bins = np.histogram(
+                    values,
+                    bins=self.bins,
+                    range=self.kwds.get("range", None),
+                    weights=self.kwds.get("weights", None),
+                )
 
         if is_list_like(self.bottom):
             self.bottom = np.array(self.bottom)
@@ -67,21 +69,38 @@ def _make_plot(self):
         colors = self._get_colors()
         stacking_id = self._get_stacking_id()
 
-        for i, (label, y) in enumerate(self._iter_data()):
-            ax = self._get_ax(i)
+        if self.by is None:
+            for i, (label, y) in enumerate(self._iter_data()):
+                ax = self._get_ax(i)
+
+                kwds = self.kwds.copy()
+                label = pprint_thing(label)
+                kwds["label"] = label
+
+                style, kwds = self._apply_style_colors(colors, kwds, i, label)
+                if style is not None:
+                    kwds["style"] = style
 
+                kwds = self._make_plot_keywords(kwds, y)
+                artists = self._plot(ax, y, column_num=i, stacking_id=stacking_id, **kwds)
+                self._add_legend_handle(artists[0], label, index=i)
+
+        else:
             kwds = self.kwds.copy()
+            kwds = self._make_plot_keywords(kwds, None)
+            naxes = len(list(self._iter_data()))
 
-            label = pprint_thing(label)
-            kwds["label"] = label
+            fig, axes = _subplots(naxes=naxes)
+            _axes = _flatten(axes)
+            for i, (label, y) in enumerate(self._iter_data()):
+                ax = _axes[i]
 
-            style, kwds = self._apply_style_colors(colors, kwds, i, label)
-            if style is not None:
-                kwds["style"] = style
+                ax.hist(y, **kwds)
+                ax.set_title(pprint_thing(label))
 
-            kwds = self._make_plot_keywords(kwds, y)
-            artists = self._plot(ax, y, column_num=i, stacking_id=stacking_id, **kwds)
-            self._add_legend_handle(artists[0], label, index=i)
+            fig.subplots_adjust(
+                bottom=0.15, top=0.9, left=0.1, right=0.9, hspace=0.5, wspace=0.3
+            )
 
     def _make_plot_keywords(self, kwds, y):
         """merge BoxPlot/KdePlot properties to passed kwds"""

From dc0c2ec9efe31a9963deb81b6588aef036e1224c Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 10 Sep 2019 21:05:23 +0200
Subject: [PATCH 004/142] add _group_plot function

---
 pandas/plotting/_matplotlib/hist.py | 38 +++++++++++++++++++----------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 1defa6116bbcc..18a9398c6d365 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -65,6 +65,27 @@ def _plot(
         cls._update_stacker(ax, stacking_id, n)
         return patches
 
+    @classmethod
+    def _group_plot(cls, ax, data, naxes, rot=90, xrot=None, **kwds):
+        converter._WARN = False  # no warning for pandas plots
+        xrot = xrot or rot
+        fig, axes = _subplots(naxes=naxes, ax=ax, squeeze=False)
+        _axes = _flatten(axes)
+
+        for i, (label, y) in enumerate(data):
+            ax = _axes[i]
+
+            ax.hist(y, **kwds)
+            ax.set_title(pprint_thing(label))
+
+        _set_ticks_props(
+            axes, xrot=xrot
+        )
+
+        fig.subplots_adjust(
+            bottom=0.15, top=0.9, left=0.1, right=0.9, hspace=0.5, wspace=0.3
+        )
+
     def _make_plot(self):
         colors = self._get_colors()
         stacking_id = self._get_stacking_id()
@@ -86,21 +107,12 @@ def _make_plot(self):
                 self._add_legend_handle(artists[0], label, index=i)
 
         else:
+            naxes = len(list(self._iter_data()))
+            data = self._iter_data()
             kwds = self.kwds.copy()
             kwds = self._make_plot_keywords(kwds, None)
-            naxes = len(list(self._iter_data()))
-
-            fig, axes = _subplots(naxes=naxes)
-            _axes = _flatten(axes)
-            for i, (label, y) in enumerate(self._iter_data()):
-                ax = _axes[i]
-
-                ax.hist(y, **kwds)
-                ax.set_title(pprint_thing(label))
-
-            fig.subplots_adjust(
-                bottom=0.15, top=0.9, left=0.1, right=0.9, hspace=0.5, wspace=0.3
-            )
+            ax = self._get_ax(0)
+            self._group_plot(ax, data, naxes, **kwds)
 
     def _make_plot_keywords(self, kwds, y):
         """merge BoxPlot/KdePlot properties to passed kwds"""

From d8039389eeb21423b0731e79a665dd78fd3f690c Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 10 Sep 2019 21:10:46 +0200
Subject: [PATCH 005/142] check function

---
 pandas/plotting/_matplotlib/hist.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 18a9398c6d365..2d08d765efbd1 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -78,9 +78,7 @@ def _group_plot(cls, ax, data, naxes, rot=90, xrot=None, **kwds):
             ax.hist(y, **kwds)
             ax.set_title(pprint_thing(label))
 
-        _set_ticks_props(
-            axes, xrot=xrot
-        )
+        _set_ticks_props(axes, xrot=xrot)
 
         fig.subplots_adjust(
             bottom=0.15, top=0.9, left=0.1, right=0.9, hspace=0.5, wspace=0.3

From 33dd762f0c4b49ae3c3999c630141a105e567a9e Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 10 Sep 2019 21:18:21 +0200
Subject: [PATCH 006/142] reformat

---
 pandas/plotting/_core.py            | 1 +
 pandas/plotting/_matplotlib/hist.py | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index e45c3e511f25d..1a3999e278a12 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -794,6 +794,7 @@ def __call__(self, *args, **kwargs):
                     data.columns = label_name
             if kwargs.get("by") is not None:
                 import pandas as pd
+
                 grouped = data.groupby(kwargs.get("by"))
                 if kwargs.get("column") is not None:
                     grouped = grouped[kwargs.get("column")]
diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 2d08d765efbd1..0f790bce663fe 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -101,7 +101,9 @@ def _make_plot(self):
                     kwds["style"] = style
 
                 kwds = self._make_plot_keywords(kwds, y)
-                artists = self._plot(ax, y, column_num=i, stacking_id=stacking_id, **kwds)
+                artists = self._plot(
+                    ax, y, column_num=i, stacking_id=stacking_id, **kwds
+                )
                 self._add_legend_handle(artists[0], label, index=i)
 
         else:

From d59d64284036cb0e6f41fa2a73550875180f5fb4 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 10 Sep 2019 21:19:45 +0200
Subject: [PATCH 007/142] put import up

---
 pandas/plotting/_core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index 1a3999e278a12..849d049336235 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -6,6 +6,7 @@
 from pandas.compat._optional import import_optional_dependency
 from pandas.util._decorators import Appender
 
+from pandas import DataFrame
 from pandas.core.dtypes.common import is_integer, is_list_like
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
 
@@ -793,7 +794,6 @@ def __call__(self, *args, **kwargs):
                     label_name = label_kw or data.columns
                     data.columns = label_name
             if kwargs.get("by") is not None:
-                import pandas as pd
 
                 grouped = data.groupby(kwargs.get("by"))
                 if kwargs.get("column") is not None:
@@ -801,7 +801,7 @@ def __call__(self, *args, **kwargs):
                 d = {}
                 for key, group in grouped:
                     d[key] = group
-                data = pd.DataFrame(d)
+                data = DataFrame(d)
                 kwargs.pop("column")
         return plot_backend.plot(data, kind=kind, **kwargs)
 

From 66eb06c487c5fd7dae85876cc3bbd4732d62b9e1 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 10 Sep 2019 21:21:57 +0200
Subject: [PATCH 008/142] add comments

---
 pandas/plotting/_core.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index 849d049336235..9cb9f24fdf82b 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -793,15 +793,19 @@ def __call__(self, *args, **kwargs):
                         )
                     label_name = label_kw or data.columns
                     data.columns = label_name
+
+            # process groupby if by argument is defined
             if kwargs.get("by") is not None:
 
                 grouped = data.groupby(kwargs.get("by"))
                 if kwargs.get("column") is not None:
                     grouped = grouped[kwargs.get("column")]
-                d = {}
+
+                # recreate data according to groupby object
+                data_dict = {}
                 for key, group in grouped:
-                    d[key] = group
-                data = DataFrame(d)
+                    data_dict[key] = group
+                data = DataFrame(data_dict)
                 kwargs.pop("column")
         return plot_backend.plot(data, kind=kind, **kwargs)
 

From ea267adbbef68507f99bbca60fc68bd19cb5949f Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 10 Sep 2019 21:32:22 +0200
Subject: [PATCH 009/142] Mimic group plot

---
 pandas/plotting/_matplotlib/hist.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 0f790bce663fe..b48b3da80908b 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -66,10 +66,21 @@ def _plot(
         return patches
 
     @classmethod
-    def _group_plot(cls, ax, data, naxes, rot=90, xrot=None, **kwds):
+    def _group_plot(
+        cls, ax, data, naxes, rot=90, xrot=None, sharex=False, sharey=False, **kwds
+    ):
+        if "figure" in kwds:
+            raise ValueError(
+                "Cannot pass 'figure' when using the "
+                "'by' argument, since a new 'Figure' instance "
+                "will be created"
+            )
+
         converter._WARN = False  # no warning for pandas plots
         xrot = xrot or rot
-        fig, axes = _subplots(naxes=naxes, ax=ax, squeeze=False)
+        fig, axes = _subplots(
+            naxes=naxes, ax=ax, squeeze=False, sharex=sharex, sharey=sharey
+        )
         _axes = _flatten(axes)
 
         for i, (label, y) in enumerate(data):

From 809522447681ab29048184109d36e439d45fd4c5 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 10 Sep 2019 21:50:41 +0200
Subject: [PATCH 010/142] fix import failure

---
 pandas/plotting/_core.py            | 4 ++--
 pandas/plotting/_matplotlib/hist.py | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index 9cb9f24fdf82b..b26254eda9a26 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -6,7 +6,7 @@
 from pandas.compat._optional import import_optional_dependency
 from pandas.util._decorators import Appender
 
-from pandas import DataFrame
+import pandas as pd
 from pandas.core.dtypes.common import is_integer, is_list_like
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
 
@@ -805,7 +805,7 @@ def __call__(self, *args, **kwargs):
                 data_dict = {}
                 for key, group in grouped:
                     data_dict[key] = group
-                data = DataFrame(data_dict)
+                data = pd.DataFrame(data_dict)
                 kwargs.pop("column")
         return plot_backend.plot(data, kind=kind, **kwargs)
 
diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index b48b3da80908b..91fd09e4a19cd 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -12,7 +12,7 @@
 from pandas.plotting._matplotlib import converter
 from pandas.plotting._matplotlib.core import LinePlot, MPLPlot
 from pandas.plotting._matplotlib.tools import _flatten, _set_ticks_props, _subplots
-
+import matplotlib.pyplot as plt
 
 class HistPlot(LinePlot):
     _kind = "hist"
@@ -67,7 +67,7 @@ def _plot(
 
     @classmethod
     def _group_plot(
-        cls, ax, data, naxes, rot=90, xrot=None, sharex=False, sharey=False, **kwds
+        cls, ax, data, naxes, rot=90, xrot=None, sharex=False, sharey=False, layout=None, **kwds
     ):
         if "figure" in kwds:
             raise ValueError(
@@ -79,7 +79,7 @@ def _group_plot(
         converter._WARN = False  # no warning for pandas plots
         xrot = xrot or rot
         fig, axes = _subplots(
-            naxes=naxes, ax=ax, squeeze=False, sharex=sharex, sharey=sharey
+            naxes=naxes, ax=ax, squeeze=False, sharex=sharex, sharey=sharey, layout=layout
         )
         _axes = _flatten(axes)
 

From 31decc1056e66d7150cc9e9c2fc3cdf7c745b399 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 10 Sep 2019 21:55:31 +0200
Subject: [PATCH 011/142] reformat

---
 pandas/plotting/_matplotlib/hist.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 91fd09e4a19cd..1d123315e59ea 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -12,7 +12,7 @@
 from pandas.plotting._matplotlib import converter
 from pandas.plotting._matplotlib.core import LinePlot, MPLPlot
 from pandas.plotting._matplotlib.tools import _flatten, _set_ticks_props, _subplots
-import matplotlib.pyplot as plt
+
 
 class HistPlot(LinePlot):
     _kind = "hist"
@@ -67,7 +67,16 @@ def _plot(
 
     @classmethod
     def _group_plot(
-        cls, ax, data, naxes, rot=90, xrot=None, sharex=False, sharey=False, layout=None, **kwds
+        cls,
+        ax,
+        data,
+        naxes,
+        rot=90,
+        xrot=None,
+        sharex=False,
+        sharey=False,
+        layout=None,
+        **kwds
     ):
         if "figure" in kwds:
             raise ValueError(
@@ -79,7 +88,12 @@ def _group_plot(
         converter._WARN = False  # no warning for pandas plots
         xrot = xrot or rot
         fig, axes = _subplots(
-            naxes=naxes, ax=ax, squeeze=False, sharex=sharex, sharey=sharey, layout=layout
+            naxes=naxes,
+            ax=ax,
+            squeeze=False,
+            sharex=sharex,
+            sharey=sharey,
+            layout=layout,
         )
         _axes = _flatten(axes)
 

From e4bdbd0df564817887cf041c811ece5c9a3f4109 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 10 Sep 2019 22:27:14 +0200
Subject: [PATCH 012/142] fix test

---
 pandas/plotting/_core.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index b26254eda9a26..07b93f442047c 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -680,8 +680,6 @@ def _get_call_args(backend_name, data, args, kwargs):
                 ("xerr", None),
                 ("secondary_y", False),
                 ("sort_columns", False),
-                ("by", None),
-                ("column", None),
             ]
         else:
             raise TypeError(

From 4033159d67a1fbc309c6b6903ed0dee767049dc2 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 10 Sep 2019 22:45:57 +0200
Subject: [PATCH 013/142] hacky fix

---
 pandas/plotting/_matplotlib/hist.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 1d123315e59ea..b890a6aea2ed9 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -1,5 +1,6 @@
 import warnings
 
+import matplotlib.pyplot as plt
 import numpy as np
 
 from pandas.core.dtypes.common import is_integer, is_list_like
@@ -76,6 +77,9 @@ def _group_plot(
         sharex=False,
         sharey=False,
         layout=None,
+        xlabelsize=None,
+        ylabelsize=None,
+        yrot=None,
         **kwds
     ):
         if "figure" in kwds:
@@ -103,11 +107,14 @@ def _group_plot(
             ax.hist(y, **kwds)
             ax.set_title(pprint_thing(label))
 
-        _set_ticks_props(axes, xrot=xrot)
+        _set_ticks_props(
+            axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot
+        )
 
         fig.subplots_adjust(
             bottom=0.15, top=0.9, left=0.1, right=0.9, hspace=0.5, wspace=0.3
         )
+        plt.show()
 
     def _make_plot(self):
         colors = self._get_colors()

From 57a3bdf23ecd845c61c2a179e69362bfa7c41751 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 10 Sep 2019 22:46:48 +0200
Subject: [PATCH 014/142] fix isrot

---
 pandas/plotting/_core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index 07b93f442047c..0c1e25a8cfe82 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -6,10 +6,10 @@
 from pandas.compat._optional import import_optional_dependency
 from pandas.util._decorators import Appender
 
-import pandas as pd
 from pandas.core.dtypes.common import is_integer, is_list_like
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
 
+import pandas as pd
 from pandas.core.base import PandasObject
 
 # Trigger matplotlib import, which implicitly registers our

From 80602233d6a5a42bfe63991ff78865a4d09fa5f2 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Wed, 11 Sep 2019 11:26:34 +0200
Subject: [PATCH 015/142] fix tests

---
 pandas/plotting/_core.py            |  4 ++--
 pandas/plotting/_matplotlib/hist.py | 10 ++++------
 pandas/tests/plotting/test_frame.py |  7 +++++++
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index 0c1e25a8cfe82..343c44a424089 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -9,7 +9,7 @@
 from pandas.core.dtypes.common import is_integer, is_list_like
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
 
-import pandas as pd
+from pandas import DataFrame
 from pandas.core.base import PandasObject
 
 # Trigger matplotlib import, which implicitly registers our
@@ -803,7 +803,7 @@ def __call__(self, *args, **kwargs):
                 data_dict = {}
                 for key, group in grouped:
                     data_dict[key] = group
-                data = pd.DataFrame(data_dict)
+                data = DataFrame(data_dict)
                 kwargs.pop("column")
         return plot_backend.plot(data, kind=kind, **kwargs)
 
diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index b890a6aea2ed9..c6103c2e891cb 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -21,7 +21,7 @@ class HistPlot(LinePlot):
     def __init__(self, data, bins=10, bottom=0, **kwargs):
         self.bins = bins  # use mpl default
         self.bottom = bottom
-        self.by = kwargs["by"]
+        self.by = kwargs.get("by")
         # Do not call LinePlot.__init__ which may fill nan
         MPLPlot.__init__(self, data, **kwargs)
 
@@ -99,11 +99,10 @@ def _group_plot(
             sharey=sharey,
             layout=layout,
         )
-        _axes = _flatten(axes)
 
+        _axes = _flatten(axes)
         for i, (label, y) in enumerate(data):
             ax = _axes[i]
-
             ax.hist(y, **kwds)
             ax.set_title(pprint_thing(label))
 
@@ -114,7 +113,7 @@ def _group_plot(
         fig.subplots_adjust(
             bottom=0.15, top=0.9, left=0.1, right=0.9, hspace=0.5, wspace=0.3
         )
-        plt.show()
+        return axes
 
     def _make_plot(self):
         colors = self._get_colors()
@@ -143,8 +142,7 @@ def _make_plot(self):
             data = self._iter_data()
             kwds = self.kwds.copy()
             kwds = self._make_plot_keywords(kwds, None)
-            ax = self._get_ax(0)
-            self._group_plot(ax, data, naxes, **kwds)
+            self._group_plot(self._get_ax(0), data, naxes, **kwds)
 
     def _make_plot_keywords(self, kwds, y):
         """merge BoxPlot/KdePlot properties to passed kwds"""
diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index f672cd3a6aa58..7390a4497ee2a 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -3229,6 +3229,13 @@ def test_subplots_sharex_false(self):
         tm.assert_numpy_array_equal(axs[0].get_xticks(), expected_ax1)
         tm.assert_numpy_array_equal(axs[1].get_xticks(), expected_ax2)
 
+    def test_hist_plot_by_argument(self):
+        # GH 15079
+        df = DataFrame(np.random.randn(30, 2), columns=['A', 'B'])
+        df["C"] = np.random.choice(["a", "b", "c"], 30)
+
+        _check_plot_works(df.plot.hist, column='A', by='C')
+
 
 def _generate_4_axes_via_gridspec():
     import matplotlib.pyplot as plt

From d66633494638536c39156fc4460d32afc7a5f976 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Wed, 11 Sep 2019 11:42:43 +0200
Subject: [PATCH 016/142] fix import failure

---
 pandas/plotting/_core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index 343c44a424089..5d8f28842845f 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -9,7 +9,7 @@
 from pandas.core.dtypes.common import is_integer, is_list_like
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
 
-from pandas import DataFrame
+from pandas.core.frame import DataFrame
 from pandas.core.base import PandasObject
 
 # Trigger matplotlib import, which implicitly registers our

From 3216d5984a5bf42ab5490dc3ce23d7ad627b5a30 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Wed, 11 Sep 2019 13:15:17 +0200
Subject: [PATCH 017/142] fix import error

---
 pandas/plotting/_core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index 5d8f28842845f..a35f23ef7f7e0 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -1,6 +1,7 @@
 import importlib
 import warnings
 
+import pandas as pd
 from pandas._config import get_option
 
 from pandas.compat._optional import import_optional_dependency
@@ -9,7 +10,6 @@
 from pandas.core.dtypes.common import is_integer, is_list_like
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
 
-from pandas.core.frame import DataFrame
 from pandas.core.base import PandasObject
 
 # Trigger matplotlib import, which implicitly registers our
@@ -803,7 +803,7 @@ def __call__(self, *args, **kwargs):
                 data_dict = {}
                 for key, group in grouped:
                     data_dict[key] = group
-                data = DataFrame(data_dict)
+                data = pd.DataFrame(data_dict)
                 kwargs.pop("column")
         return plot_backend.plot(data, kind=kind, **kwargs)
 

From 45f4b7fa2cc9b937a7f1548b04ead5b360e43006 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Wed, 11 Sep 2019 14:50:06 +0200
Subject: [PATCH 018/142] Update imports

---
 pandas/plotting/_core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index a35f23ef7f7e0..b45899e22f712 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -1,7 +1,6 @@
 import importlib
 import warnings
 
-import pandas as pd
 from pandas._config import get_option
 
 from pandas.compat._optional import import_optional_dependency
@@ -11,6 +10,7 @@
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
 
 from pandas.core.base import PandasObject
+from pandas.core.frame import DataFrame
 
 # Trigger matplotlib import, which implicitly registers our
 # converts. Implicit registration is deprecated, and when enforced
@@ -803,7 +803,7 @@ def __call__(self, *args, **kwargs):
                 data_dict = {}
                 for key, group in grouped:
                     data_dict[key] = group
-                data = pd.DataFrame(data_dict)
+                data = DataFrame(data_dict)
                 kwargs.pop("column")
         return plot_backend.plot(data, kind=kind, **kwargs)
 

From 2b0785b3282072c891c932a1637fd069ba4e1e39 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Wed, 11 Sep 2019 16:09:35 +0200
Subject: [PATCH 019/142] test imports

---
 pandas/plotting/_core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index b45899e22f712..da9894a5eb5d1 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -10,7 +10,6 @@
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
 
 from pandas.core.base import PandasObject
-from pandas.core.frame import DataFrame
 
 # Trigger matplotlib import, which implicitly registers our
 # converts. Implicit registration is deprecated, and when enforced
@@ -794,6 +793,7 @@ def __call__(self, *args, **kwargs):
 
             # process groupby if by argument is defined
             if kwargs.get("by") is not None:
+                from pandas.core.frame import DataFrame
 
                 grouped = data.groupby(kwargs.get("by"))
                 if kwargs.get("column") is not None:

From d79dba3e170badf8cc4d8da57be33a81db18dfe3 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Thu, 12 Sep 2019 20:10:08 +0200
Subject: [PATCH 020/142] new change

---
 pandas/plotting/_core.py            | 15 ---------------
 pandas/plotting/_matplotlib/core.py | 16 ++++++++++++++++
 pandas/plotting/_matplotlib/hist.py |  4 +++-
 3 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index da9894a5eb5d1..7ec7a2b596abb 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -790,21 +790,6 @@ def __call__(self, *args, **kwargs):
                         )
                     label_name = label_kw or data.columns
                     data.columns = label_name
-
-            # process groupby if by argument is defined
-            if kwargs.get("by") is not None:
-                from pandas.core.frame import DataFrame
-
-                grouped = data.groupby(kwargs.get("by"))
-                if kwargs.get("column") is not None:
-                    grouped = grouped[kwargs.get("column")]
-
-                # recreate data according to groupby object
-                data_dict = {}
-                for key, group in grouped:
-                    data_dict[key] = group
-                data = DataFrame(data_dict)
-                kwargs.pop("column")
         return plot_backend.plot(data, kind=kind, **kwargs)
 
     def line(self, x=None, y=None, **kwargs):
diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 346949cb82c4d..02f6069bd63d6 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -25,6 +25,7 @@
 )
 from pandas.core.dtypes.missing import isna, notna
 
+from pandas.core.frame import DataFrame
 import pandas.core.common as com
 
 from pandas.io.formats.printing import pprint_thing
@@ -107,6 +108,7 @@ def __init__(
         table=False,
         layout=None,
         include_bool=False,
+        column=None,
         **kwds
     ):
 
@@ -115,6 +117,7 @@ def __init__(
         converter._WARN = False  # no warning for pandas plots
         self.data = data
         self.by = by
+        self.column = column
 
         self.kind = kind
 
@@ -399,6 +402,19 @@ def _compute_plot_data(self):
                 label = "None"
             data = data.to_frame(name=label)
 
+        # GH15079 restructure data if by is defined
+        if self.by is not None:
+            grouped = data.groupby(self.by)
+
+            if self.column is not None:
+                grouped = grouped[self.column]
+
+            # recreate data according to groupby object
+            data_dict = {}
+            for key, group in grouped:
+                data_dict[key] = group
+            data = DataFrame(data_dict)
+
         # GH16953, _convert is needed as fallback, for ``Series``
         # with ``dtype == object``
         data = data._convert(datetime=True, timedelta=True)
diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index c6103c2e891cb..82246065bd3c3 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -1,6 +1,5 @@
 import warnings
 
-import matplotlib.pyplot as plt
 import numpy as np
 
 from pandas.core.dtypes.common import is_integer, is_list_like
@@ -13,6 +12,7 @@
 from pandas.plotting._matplotlib import converter
 from pandas.plotting._matplotlib.core import LinePlot, MPLPlot
 from pandas.plotting._matplotlib.tools import _flatten, _set_ticks_props, _subplots
+from pandas.core.frame import DataFrame
 
 
 class HistPlot(LinePlot):
@@ -22,6 +22,8 @@ def __init__(self, data, bins=10, bottom=0, **kwargs):
         self.bins = bins  # use mpl default
         self.bottom = bottom
         self.by = kwargs.get("by")
+        self.column = kwargs.get("column")
+
         # Do not call LinePlot.__init__ which may fill nan
         MPLPlot.__init__(self, data, **kwargs)
 

From 321fbd24a7566a4a29af5b46d0255c0d8898a883 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Thu, 12 Sep 2019 20:12:21 +0200
Subject: [PATCH 021/142] restore removed line

---
 pandas/plotting/_core.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index 7ec7a2b596abb..837b01974be93 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -790,6 +790,7 @@ def __call__(self, *args, **kwargs):
                         )
                     label_name = label_kw or data.columns
                     data.columns = label_name
+
         return plot_backend.plot(data, kind=kind, **kwargs)
 
     def line(self, x=None, y=None, **kwargs):

From a7b9ae556cdedb6ce506b19ccba4421e5578bbbc Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Thu, 12 Sep 2019 20:14:13 +0200
Subject: [PATCH 022/142] Remove unused line

---
 pandas/plotting/_matplotlib/hist.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 82246065bd3c3..a016e3f9efe71 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -22,7 +22,6 @@ def __init__(self, data, bins=10, bottom=0, **kwargs):
         self.bins = bins  # use mpl default
         self.bottom = bottom
         self.by = kwargs.get("by")
-        self.column = kwargs.get("column")
 
         # Do not call LinePlot.__init__ which may fill nan
         MPLPlot.__init__(self, data, **kwargs)

From d2d13fd8fb25dd80b527a7ad80babbd552512273 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Thu, 12 Sep 2019 23:04:29 +0200
Subject: [PATCH 023/142] Disruptive change

---
 pandas/plotting/_matplotlib/core.py | 51 +++++++++++++++++++++--------
 pandas/plotting/_matplotlib/hist.py | 28 +++++-----------
 2 files changed, 46 insertions(+), 33 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index c697cbe7ddaae..d3d5f307e7b01 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -25,8 +25,10 @@
 )
 from pandas.core.dtypes.missing import isna, notna
 
-from pandas.core.frame import DataFrame
+from pandas import concat
 import pandas.core.common as com
+from pandas.core.frame import DataFrame
+from pandas.core.index import MultiIndex
 
 from pandas.io.formats.printing import pprint_thing
 from pandas.plotting._matplotlib import converter
@@ -126,7 +128,7 @@ def __init__(
         self.subplots = subplots
 
         if sharex is None:
-            if ax is None:
+            if ax is None and by is None:
                 self.sharex = True
             else:
                 # if we get an axis, the users should do the visibility
@@ -263,18 +265,30 @@ def _iter_data(self, data=None, keep_index=False, fillna=None):
         # else:
         #     columns = data.columns
 
-        for col, values in data.items():
-            if keep_index is True:
-                yield col, values
-            else:
-                yield col, values.values
+        if not isinstance(data.columns, ABCMultiIndex):
+            for col, values in data.items():
+                if keep_index is True:
+                    yield col, values
+                else:
+                    yield col, values.values
+        else:
+            cols = data.columns.get_level_values(0).unique()
+
+            for col in cols:
+                if keep_index is True:
+                    yield col, data[col]
+                else:
+                    yield col, data[col].values
 
     @property
     def nseries(self):
         if self.data.ndim == 1:
             return 1
         else:
-            return self.data.shape[1]
+            if not isinstance(self.data.columns, ABCMultiIndex):
+                return self.data.shape[1]
+            else:
+                return len(set(self.data.columns.get_level_values(0)))
 
     def draw(self):
         self.plt.draw_if_interactive()
@@ -404,17 +418,28 @@ def _compute_plot_data(self):
 
         # GH15079 restructure data if by is defined
         if self.by is not None:
+            self.subplots = True
             grouped = data.groupby(self.by)
 
             if self.column is not None:
                 grouped = grouped[self.column]
 
-            # recreate data according to groupby object
-            data_dict = {}
-            for key, group in grouped:
-                data_dict[key] = group
-            data = DataFrame(data_dict)
+            if len(self.column) == 1:
+                # recreate data according to groupby object
+                data_dict = {}
+                for key, group in grouped:
+                    data_dict[key] = group
+                data = DataFrame(data_dict)
 
+            else:
+                l = []
+                for key, group in grouped:
+                    columns = MultiIndex.from_product([[key], self.column])
+                    group = group[self.column]
+                    group.columns = columns
+                    l.append(group)
+
+                data = concat(l, axis=1)
         # GH16953, _convert is needed as fallback, for ``Series``
         # with ``dtype == object``
         data = data._convert(datetime=True, timedelta=True)
diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index a016e3f9efe71..ecc8dfc64f284 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -12,7 +12,6 @@
 from pandas.plotting._matplotlib import converter
 from pandas.plotting._matplotlib.core import LinePlot, MPLPlot
 from pandas.plotting._matplotlib.tools import _flatten, _set_ticks_props, _subplots
-from pandas.core.frame import DataFrame
 
 
 class HistPlot(LinePlot):
@@ -70,14 +69,12 @@ def _plot(
     @classmethod
     def _group_plot(
         cls,
-        ax,
+        axes,
         data,
-        naxes,
+        fig,
+        labels,
         rot=90,
         xrot=None,
-        sharex=False,
-        sharey=False,
-        layout=None,
         xlabelsize=None,
         ylabelsize=None,
         yrot=None,
@@ -92,19 +89,11 @@ def _group_plot(
 
         converter._WARN = False  # no warning for pandas plots
         xrot = xrot or rot
-        fig, axes = _subplots(
-            naxes=naxes,
-            ax=ax,
-            squeeze=False,
-            sharex=sharex,
-            sharey=sharey,
-            layout=layout,
-        )
 
-        _axes = _flatten(axes)
         for i, (label, y) in enumerate(data):
-            ax = _axes[i]
-            ax.hist(y, **kwds)
+            ax = axes[i]
+            # TODO: now df.hist also has no value for this
+            ax.hist(y, label=labels, **kwds)
             ax.set_title(pprint_thing(label))
 
         _set_ticks_props(
@@ -112,7 +101,7 @@ def _group_plot(
         )
 
         fig.subplots_adjust(
-            bottom=0.15, top=0.9, left=0.1, right=0.9, hspace=0.5, wspace=0.3
+            bottom=0.15, top=0.9, left=0.1, right=0.9, hspace=0.8, wspace=0.3
         )
         return axes
 
@@ -139,11 +128,10 @@ def _make_plot(self):
                 self._add_legend_handle(artists[0], label, index=i)
 
         else:
-            naxes = len(list(self._iter_data()))
             data = self._iter_data()
             kwds = self.kwds.copy()
             kwds = self._make_plot_keywords(kwds, None)
-            self._group_plot(self._get_ax(0), data, naxes, **kwds)
+            self._group_plot(self.axes, data, self.fig, self.column, **kwds)
 
     def _make_plot_keywords(self, kwds, y):
         """merge BoxPlot/KdePlot properties to passed kwds"""

From 5abedb6ba7d87cd40bf67b99d20b4658c0bfe6c2 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Fri, 13 Sep 2019 16:59:56 +0200
Subject: [PATCH 024/142] should work this time

---
 pandas/plotting/_matplotlib/core.py |  6 ++--
 pandas/plotting/_matplotlib/hist.py | 45 ++++++++++++++++++-----------
 2 files changed, 31 insertions(+), 20 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index d3d5f307e7b01..df9143061765b 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -432,14 +432,14 @@ def _compute_plot_data(self):
                 data = DataFrame(data_dict)
 
             else:
-                l = []
+                data_list = []
                 for key, group in grouped:
                     columns = MultiIndex.from_product([[key], self.column])
                     group = group[self.column]
                     group.columns = columns
-                    l.append(group)
+                    data_list.append(group)
 
-                data = concat(l, axis=1)
+                data = concat(data_list, axis=1)
         # GH16953, _convert is needed as fallback, for ``Series``
         # with ``dtype == object``
         data = data._convert(datetime=True, timedelta=True)
diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index ecc8dfc64f284..0c46a1f703f84 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -26,23 +26,35 @@ def __init__(self, data, bins=10, bottom=0, **kwargs):
         MPLPlot.__init__(self, data, **kwargs)
 
     def _args_adjust(self):
-        if self.by is None:
-            if is_integer(self.bins):
-                # create common bin edge
-                values = self.data._convert(datetime=True)._get_numeric_data()
-                values = np.ravel(values)
-                values = values[~isna(values)]
-
-                hist, self.bins = np.histogram(
-                    values,
-                    bins=self.bins,
-                    range=self.kwds.get("range", None),
-                    weights=self.kwds.get("weights", None),
-                )
+        if is_integer(self.bins):
+            if self.by is None:
+                self.bins = self._caculcate_bins(self.data)
+
+            else:
+                grouped = self.data.groupby(self.by)[self.column]
+                bins_list = []
+                for key, group in grouped:
+                    print(key)
+                    print(group)
+                    bins_list.append(self._caculcate_bins(group))
+                self.bins = bins_list
 
         if is_list_like(self.bottom):
             self.bottom = np.array(self.bottom)
 
+    def _caculcate_bins(self, data):
+        values = data._convert(datetime=True)._get_numeric_data()
+        values = np.ravel(values)
+        values = values[~isna(values)]
+
+        hist, bins = np.histogram(
+            values,
+            bins=self.bins,
+            range=self.kwds.get("range", None),
+            weights=self.kwds.get("weights", None),
+        )
+        return bins
+
     @classmethod
     def _plot(
         cls,
@@ -73,6 +85,7 @@ def _group_plot(
         data,
         fig,
         labels,
+        bins=None,
         rot=90,
         xrot=None,
         xlabelsize=None,
@@ -92,8 +105,7 @@ def _group_plot(
 
         for i, (label, y) in enumerate(data):
             ax = axes[i]
-            # TODO: now df.hist also has no value for this
-            ax.hist(y, label=labels, **kwds)
+            ax.hist(y, bins[i], label=labels, **kwds)
             ax.set_title(pprint_thing(label))
 
         _set_ticks_props(
@@ -108,7 +120,6 @@ def _group_plot(
     def _make_plot(self):
         colors = self._get_colors()
         stacking_id = self._get_stacking_id()
-
         if self.by is None:
             for i, (label, y) in enumerate(self._iter_data()):
                 ax = self._get_ax(i)
@@ -128,9 +139,9 @@ def _make_plot(self):
                 self._add_legend_handle(artists[0], label, index=i)
 
         else:
-            data = self._iter_data()
             kwds = self.kwds.copy()
             kwds = self._make_plot_keywords(kwds, None)
+            data = self._iter_data()
             self._group_plot(self.axes, data, self.fig, self.column, **kwds)
 
     def _make_plot_keywords(self, kwds, y):

From d73115a3ff683ab22a6b09bfc7c2010d6655980e Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Fri, 13 Sep 2019 17:05:36 +0200
Subject: [PATCH 025/142] Add in-code comments

---
 pandas/plotting/_matplotlib/core.py | 6 ++++++
 pandas/plotting/_matplotlib/hist.py | 5 +++++
 2 files changed, 11 insertions(+)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index df9143061765b..74687f1eff26d 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -128,6 +128,8 @@ def __init__(
         self.subplots = subplots
 
         if sharex is None:
+
+            # if by is defined, subplots are used and sharex should be False
             if ax is None and by is None:
                 self.sharex = True
             else:
@@ -285,6 +287,9 @@ def nseries(self):
         if self.data.ndim == 1:
             return 1
         else:
+
+            # If MultiIndex column, only return the first level which
+            # corresponds to by argument
             if not isinstance(self.data.columns, ABCMultiIndex):
                 return self.data.shape[1]
             else:
@@ -440,6 +445,7 @@ def _compute_plot_data(self):
                     data_list.append(group)
 
                 data = concat(data_list, axis=1)
+
         # GH16953, _convert is needed as fallback, for ``Series``
         # with ``dtype == object``
         data = data._convert(datetime=True, timedelta=True)
diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 0c46a1f703f84..49350e1bac2c8 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -26,6 +26,9 @@ def __init__(self, data, bins=10, bottom=0, **kwargs):
         MPLPlot.__init__(self, data, **kwargs)
 
     def _args_adjust(self):
+
+        # calculate bin number separately in different subplots
+        # where subplots are created based on by argument
         if is_integer(self.bins):
             if self.by is None:
                 self.bins = self._caculcate_bins(self.data)
@@ -43,6 +46,8 @@ def _args_adjust(self):
             self.bottom = np.array(self.bottom)
 
     def _caculcate_bins(self, data):
+        """Calculate bins given data"""
+
         values = data._convert(datetime=True)._get_numeric_data()
         values = np.ravel(values)
         values = values[~isna(values)]

From d7998bb515d267f8ba8ce7825bed358a43b1f8d9 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Fri, 13 Sep 2019 17:07:35 +0200
Subject: [PATCH 026/142] remove print

---
 pandas/plotting/_matplotlib/hist.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 49350e1bac2c8..0e39c9e61467a 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -37,8 +37,6 @@ def _args_adjust(self):
                 grouped = self.data.groupby(self.by)[self.column]
                 bins_list = []
                 for key, group in grouped:
-                    print(key)
-                    print(group)
                     bins_list.append(self._caculcate_bins(group))
                 self.bins = bins_list
 

From 1bbf7ea8bbbf656e0dc1f3c42f29521f20429a81 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Fri, 13 Sep 2019 19:41:13 +0200
Subject: [PATCH 027/142] reformat

---
 pandas/tests/plotting/test_frame.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index cdb24ab1b2987..f623aad310319 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -3231,10 +3231,10 @@ def test_subplots_sharex_false(self):
 
     def test_hist_plot_by_argument(self):
         # GH 15079
-        df = DataFrame(np.random.randn(30, 2), columns=['A', 'B'])
+        df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
         df["C"] = np.random.choice(["a", "b", "c"], 30)
 
-        _check_plot_works(df.plot.hist, column='A', by='C')
+        _check_plot_works(df.plot.hist, column="A", by="C")
 
     def test_plot_no_rows(self):
         # GH 27758

From a279f45fa680155febd2aeecc4446fef61470659 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Fri, 13 Sep 2019 19:43:44 +0200
Subject: [PATCH 028/142] Dropna

---
 pandas/plotting/_matplotlib/hist.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 0e39c9e61467a..9474366eae4aa 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -108,6 +108,7 @@ def _group_plot(
 
         for i, (label, y) in enumerate(data):
             ax = axes[i]
+            y = y[~isna(y)]
             ax.hist(y, bins[i], label=labels, **kwds)
             ax.set_title(pprint_thing(label))
 

From 2b793eaf0571e7012aff5fe0df123dbdcac30999 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 14 Sep 2019 18:56:57 +0200
Subject: [PATCH 029/142] Add isna for multi column

---
 pandas/plotting/_matplotlib/hist.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 9474366eae4aa..fc9dc718e0929 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -20,7 +20,6 @@ class HistPlot(LinePlot):
     def __init__(self, data, bins=10, bottom=0, **kwargs):
         self.bins = bins  # use mpl default
         self.bottom = bottom
-        self.by = kwargs.get("by")
 
         # Do not call LinePlot.__init__ which may fill nan
         MPLPlot.__init__(self, data, **kwargs)
@@ -108,8 +107,11 @@ def _group_plot(
 
         for i, (label, y) in enumerate(data):
             ax = axes[i]
-            y = y[~isna(y)]
-            ax.hist(y, bins[i], label=labels, **kwds)
+            if len(y.shape) > 1:
+                y_notna = np.array(col[~isna(col)] for col in y.T).T
+            else:
+                y_notna = y[~isna(y)]
+            ax.hist(y_notna, bins[i], label=labels, **kwds)
             ax.set_title(pprint_thing(label))
 
         _set_ticks_props(

From 04de066e99565e8ae657227fde1026e3cacd1b2c Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sun, 15 Sep 2019 22:20:13 +0200
Subject: [PATCH 030/142] try to remove warning

---
 pandas/plotting/_matplotlib/hist.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index fc9dc718e0929..62d27a5d7509f 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -102,13 +102,13 @@ def _group_plot(
                 "will be created"
             )
 
-        converter._WARN = False  # no warning for pandas plots
         xrot = xrot or rot
 
         for i, (label, y) in enumerate(data):
             ax = axes[i]
             if len(y.shape) > 1:
-                y_notna = np.array(col[~isna(col)] for col in y.T).T
+                notna = [col[~isna(col)] for col in y.T]
+                y_notna = np.array(np.array(notna).T)
             else:
                 y_notna = y[~isna(y)]
             ax.hist(y_notna, bins[i], label=labels, **kwds)

From 4adc3240daf0d0e588a18a7aab68de11cee407e1 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Mon, 16 Sep 2019 09:24:20 +0200
Subject: [PATCH 031/142] test if removing pd works

---
 pandas/tests/plotting/test_frame.py | 40 ++++++++++++++---------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index f623aad310319..3194859e428f1 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -582,7 +582,7 @@ def test_subplots_timeseries_y_axis_not_supported(self):
                 pd.to_datetime("2017-08-02 00:00:00"),
             ],
         }
-        testdata = pd.DataFrame(data)
+        testdata = DataFrame(data)
         ax_period = testdata.plot(x="numeric", y="period")
         assert (
             ax_period.get_lines()[0].get_data()[1] == testdata["period"].values
@@ -952,7 +952,7 @@ def test_bar_colors(self):
         tm.close()
 
     def test_bar_user_colors(self):
-        df = pd.DataFrame(
+        df = DataFrame(
             {"A": range(4), "B": range(1, 5), "color": ["red", "blue", "blue", "red"]}
         )
         # This should *only* work when `y` is specified, else
@@ -1114,13 +1114,13 @@ def test_bar_nan(self):
     @pytest.mark.slow
     def test_bar_categorical(self):
         # GH 13019
-        df1 = pd.DataFrame(
+        df1 = DataFrame(
             np.random.randn(6, 5),
             index=pd.Index(list("ABCDEF")),
             columns=pd.Index(list("abcde")),
         )
         # categorical index must behave the same
-        df2 = pd.DataFrame(
+        df2 = DataFrame(
             np.random.randn(6, 5),
             index=pd.CategoricalIndex(list("ABCDEF")),
             columns=pd.CategoricalIndex(list("abcde")),
@@ -1167,7 +1167,7 @@ def test_if_scatterplot_colorbar_affects_xaxis_visibility(self):
         # interfere with x-axis label and ticklabels with
         # ipython inline backend.
         random_array = np.random.random((1000, 3))
-        df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"])
+        df = DataFrame(random_array, columns=["A label", "B label", "C label"])
 
         ax1 = df.plot.scatter(x="A label", y="B label")
         ax2 = df.plot.scatter(x="A label", y="B label", c="C label")
@@ -1190,7 +1190,7 @@ def test_if_hexbin_xaxis_label_is_visible(self):
         # interfere with x-axis label and ticklabels with
         # ipython inline backend.
         random_array = np.random.random((1000, 3))
-        df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"])
+        df = DataFrame(random_array, columns=["A label", "B label", "C label"])
 
         ax = df.plot.hexbin("A label", "B label", gridsize=12)
         assert all(vis.get_visible() for vis in ax.xaxis.get_minorticklabels())
@@ -1202,7 +1202,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self):
         import matplotlib.pyplot as plt
 
         random_array = np.random.random((1000, 3))
-        df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"])
+        df = DataFrame(random_array, columns=["A label", "B label", "C label"])
 
         fig, axes = plt.subplots(1, 2)
         df.plot.scatter("A label", "B label", c="C label", ax=axes[0])
@@ -1218,7 +1218,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self):
     @pytest.mark.slow
     def test_plot_scatter_with_categorical_data(self):
         # GH 16199
-        df = pd.DataFrame(
+        df = DataFrame(
             {"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])}
         )
 
@@ -1883,7 +1883,7 @@ def test_df_legend_labels(self):
 
     def test_missing_marker_multi_plots_on_same_ax(self):
         # GH 18222
-        df = pd.DataFrame(
+        df = DataFrame(
             data=[[1, 1, 1, 1], [2, 2, 4, 8]], columns=["x", "r", "g", "b"]
         )
         fig, ax = self.plt.subplots(nrows=1, ncols=3)
@@ -2023,7 +2023,7 @@ def test_line_colors(self):
     @pytest.mark.slow
     def test_dont_modify_colors(self):
         colors = ["r", "g", "b"]
-        pd.DataFrame(np.random.rand(10, 2)).plot(color=colors)
+        DataFrame(np.random.rand(10, 2)).plot(color=colors)
         assert len(colors) == 3
 
     @pytest.mark.slow
@@ -3114,7 +3114,7 @@ def test_passed_bar_colors(self):
 
         color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)]
         colormap = mpl.colors.ListedColormap(color_tuples)
-        barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap)
+        barplot = DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap)
         assert color_tuples == [c.get_facecolor() for c in barplot.patches]
 
     def test_rcParams_bar_colors(self):
@@ -3122,14 +3122,14 @@ def test_rcParams_bar_colors(self):
 
         color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)]
         with mpl.rc_context(rc={"axes.prop_cycle": mpl.cycler("color", color_tuples)}):
-            barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar")
+            barplot = DataFrame([[1, 2, 3]]).plot(kind="bar")
         assert color_tuples == [c.get_facecolor() for c in barplot.patches]
 
     @pytest.mark.parametrize("method", ["line", "barh", "bar"])
     def test_secondary_axis_font_size(self, method):
         # GH: 12565
         df = (
-            pd.DataFrame(np.random.randn(15, 2), columns=list("AB"))
+            DataFrame(np.random.randn(15, 2), columns=list("AB"))
             .assign(C=lambda df: df.B.cumsum())
             .assign(D=lambda df: df.C * 1.1)
         )
@@ -3145,7 +3145,7 @@ def test_secondary_axis_font_size(self, method):
     def test_x_string_values_ticks(self):
         # Test if string plot index have a fixed xtick position
         # GH: 7612, GH: 22334
-        df = pd.DataFrame(
+        df = DataFrame(
             {
                 "sales": [3, 2, 3],
                 "visits": [20, 42, 28],
@@ -3166,7 +3166,7 @@ def test_x_multiindex_values_ticks(self):
         # Test if multiindex plot index have a fixed xtick position
         # GH: 15912
         index = pd.MultiIndex.from_product([[2012, 2013], [1, 2]])
-        df = pd.DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index)
+        df = DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index)
         ax = df.plot()
         ax.set_xlim(-1, 4)
         xticklabels = [t.get_text() for t in ax.get_xticklabels()]
@@ -3181,7 +3181,7 @@ def test_x_multiindex_values_ticks(self):
     def test_xlim_plot_line(self, kind):
         # test if xlim is set correctly in plot.line and plot.area
         # GH 27686
-        df = pd.DataFrame([2, 4], index=[1, 2])
+        df = DataFrame([2, 4], index=[1, 2])
         ax = df.plot(kind=kind)
         xlims = ax.get_xlim()
         assert xlims[0] < 1
@@ -3193,7 +3193,7 @@ def test_xlim_plot_line_correctly_in_mixed_plot_type(self):
         fig, ax = self.plt.subplots()
 
         indexes = ["k1", "k2", "k3", "k4"]
-        df = pd.DataFrame(
+        df = DataFrame(
             {
                 "s1": [1000, 2000, 1500, 2000],
                 "s2": [900, 1400, 2000, 3000],
@@ -3216,7 +3216,7 @@ def test_xlim_plot_line_correctly_in_mixed_plot_type(self):
     def test_subplots_sharex_false(self):
         # test when sharex is set to False, two plots should have different
         # labels, GH 25160
-        df = pd.DataFrame(np.random.rand(10, 2))
+        df = DataFrame(np.random.rand(10, 2))
         df.iloc[5:, 1] = np.nan
         df.iloc[:5, 0] = np.nan
 
@@ -3238,7 +3238,7 @@ def test_hist_plot_by_argument(self):
 
     def test_plot_no_rows(self):
         # GH 27758
-        df = pd.DataFrame(columns=["foo"], dtype=int)
+        df = DataFrame(columns=["foo"], dtype=int)
         assert df.empty
         ax = df.plot()
         assert len(ax.get_lines()) == 1
@@ -3247,7 +3247,7 @@ def test_plot_no_rows(self):
         assert len(line.get_ydata()) == 0
 
     def test_plot_no_numeric_data(self):
-        df = pd.DataFrame(["a", "b", "c"])
+        df = DataFrame(["a", "b", "c"])
         with pytest.raises(TypeError):
             df.plot()
 

From d0103a4b5fc227e325b9d952b8a3ff8f1fb5e4e3 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Mon, 16 Sep 2019 09:54:15 +0200
Subject: [PATCH 032/142] revert changes

---
 pandas/tests/plotting/test_frame.py | 40 ++++++++++++++---------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index 3194859e428f1..f623aad310319 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -582,7 +582,7 @@ def test_subplots_timeseries_y_axis_not_supported(self):
                 pd.to_datetime("2017-08-02 00:00:00"),
             ],
         }
-        testdata = DataFrame(data)
+        testdata = pd.DataFrame(data)
         ax_period = testdata.plot(x="numeric", y="period")
         assert (
             ax_period.get_lines()[0].get_data()[1] == testdata["period"].values
@@ -952,7 +952,7 @@ def test_bar_colors(self):
         tm.close()
 
     def test_bar_user_colors(self):
-        df = DataFrame(
+        df = pd.DataFrame(
             {"A": range(4), "B": range(1, 5), "color": ["red", "blue", "blue", "red"]}
         )
         # This should *only* work when `y` is specified, else
@@ -1114,13 +1114,13 @@ def test_bar_nan(self):
     @pytest.mark.slow
     def test_bar_categorical(self):
         # GH 13019
-        df1 = DataFrame(
+        df1 = pd.DataFrame(
             np.random.randn(6, 5),
             index=pd.Index(list("ABCDEF")),
             columns=pd.Index(list("abcde")),
         )
         # categorical index must behave the same
-        df2 = DataFrame(
+        df2 = pd.DataFrame(
             np.random.randn(6, 5),
             index=pd.CategoricalIndex(list("ABCDEF")),
             columns=pd.CategoricalIndex(list("abcde")),
@@ -1167,7 +1167,7 @@ def test_if_scatterplot_colorbar_affects_xaxis_visibility(self):
         # interfere with x-axis label and ticklabels with
         # ipython inline backend.
         random_array = np.random.random((1000, 3))
-        df = DataFrame(random_array, columns=["A label", "B label", "C label"])
+        df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"])
 
         ax1 = df.plot.scatter(x="A label", y="B label")
         ax2 = df.plot.scatter(x="A label", y="B label", c="C label")
@@ -1190,7 +1190,7 @@ def test_if_hexbin_xaxis_label_is_visible(self):
         # interfere with x-axis label and ticklabels with
         # ipython inline backend.
         random_array = np.random.random((1000, 3))
-        df = DataFrame(random_array, columns=["A label", "B label", "C label"])
+        df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"])
 
         ax = df.plot.hexbin("A label", "B label", gridsize=12)
         assert all(vis.get_visible() for vis in ax.xaxis.get_minorticklabels())
@@ -1202,7 +1202,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self):
         import matplotlib.pyplot as plt
 
         random_array = np.random.random((1000, 3))
-        df = DataFrame(random_array, columns=["A label", "B label", "C label"])
+        df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"])
 
         fig, axes = plt.subplots(1, 2)
         df.plot.scatter("A label", "B label", c="C label", ax=axes[0])
@@ -1218,7 +1218,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self):
     @pytest.mark.slow
     def test_plot_scatter_with_categorical_data(self):
         # GH 16199
-        df = DataFrame(
+        df = pd.DataFrame(
             {"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])}
         )
 
@@ -1883,7 +1883,7 @@ def test_df_legend_labels(self):
 
     def test_missing_marker_multi_plots_on_same_ax(self):
         # GH 18222
-        df = DataFrame(
+        df = pd.DataFrame(
             data=[[1, 1, 1, 1], [2, 2, 4, 8]], columns=["x", "r", "g", "b"]
         )
         fig, ax = self.plt.subplots(nrows=1, ncols=3)
@@ -2023,7 +2023,7 @@ def test_line_colors(self):
     @pytest.mark.slow
     def test_dont_modify_colors(self):
         colors = ["r", "g", "b"]
-        DataFrame(np.random.rand(10, 2)).plot(color=colors)
+        pd.DataFrame(np.random.rand(10, 2)).plot(color=colors)
         assert len(colors) == 3
 
     @pytest.mark.slow
@@ -3114,7 +3114,7 @@ def test_passed_bar_colors(self):
 
         color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)]
         colormap = mpl.colors.ListedColormap(color_tuples)
-        barplot = DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap)
+        barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap)
         assert color_tuples == [c.get_facecolor() for c in barplot.patches]
 
     def test_rcParams_bar_colors(self):
@@ -3122,14 +3122,14 @@ def test_rcParams_bar_colors(self):
 
         color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)]
         with mpl.rc_context(rc={"axes.prop_cycle": mpl.cycler("color", color_tuples)}):
-            barplot = DataFrame([[1, 2, 3]]).plot(kind="bar")
+            barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar")
         assert color_tuples == [c.get_facecolor() for c in barplot.patches]
 
     @pytest.mark.parametrize("method", ["line", "barh", "bar"])
     def test_secondary_axis_font_size(self, method):
         # GH: 12565
         df = (
-            DataFrame(np.random.randn(15, 2), columns=list("AB"))
+            pd.DataFrame(np.random.randn(15, 2), columns=list("AB"))
             .assign(C=lambda df: df.B.cumsum())
             .assign(D=lambda df: df.C * 1.1)
         )
@@ -3145,7 +3145,7 @@ def test_secondary_axis_font_size(self, method):
     def test_x_string_values_ticks(self):
         # Test if string plot index have a fixed xtick position
         # GH: 7612, GH: 22334
-        df = DataFrame(
+        df = pd.DataFrame(
             {
                 "sales": [3, 2, 3],
                 "visits": [20, 42, 28],
@@ -3166,7 +3166,7 @@ def test_x_multiindex_values_ticks(self):
         # Test if multiindex plot index have a fixed xtick position
         # GH: 15912
         index = pd.MultiIndex.from_product([[2012, 2013], [1, 2]])
-        df = DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index)
+        df = pd.DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index)
         ax = df.plot()
         ax.set_xlim(-1, 4)
         xticklabels = [t.get_text() for t in ax.get_xticklabels()]
@@ -3181,7 +3181,7 @@ def test_x_multiindex_values_ticks(self):
     def test_xlim_plot_line(self, kind):
         # test if xlim is set correctly in plot.line and plot.area
         # GH 27686
-        df = DataFrame([2, 4], index=[1, 2])
+        df = pd.DataFrame([2, 4], index=[1, 2])
         ax = df.plot(kind=kind)
         xlims = ax.get_xlim()
         assert xlims[0] < 1
@@ -3193,7 +3193,7 @@ def test_xlim_plot_line_correctly_in_mixed_plot_type(self):
         fig, ax = self.plt.subplots()
 
         indexes = ["k1", "k2", "k3", "k4"]
-        df = DataFrame(
+        df = pd.DataFrame(
             {
                 "s1": [1000, 2000, 1500, 2000],
                 "s2": [900, 1400, 2000, 3000],
@@ -3216,7 +3216,7 @@ def test_xlim_plot_line_correctly_in_mixed_plot_type(self):
     def test_subplots_sharex_false(self):
         # test when sharex is set to False, two plots should have different
         # labels, GH 25160
-        df = DataFrame(np.random.rand(10, 2))
+        df = pd.DataFrame(np.random.rand(10, 2))
         df.iloc[5:, 1] = np.nan
         df.iloc[:5, 0] = np.nan
 
@@ -3238,7 +3238,7 @@ def test_hist_plot_by_argument(self):
 
     def test_plot_no_rows(self):
         # GH 27758
-        df = DataFrame(columns=["foo"], dtype=int)
+        df = pd.DataFrame(columns=["foo"], dtype=int)
         assert df.empty
         ax = df.plot()
         assert len(ax.get_lines()) == 1
@@ -3247,7 +3247,7 @@ def test_plot_no_rows(self):
         assert len(line.get_ydata()) == 0
 
     def test_plot_no_numeric_data(self):
-        df = DataFrame(["a", "b", "c"])
+        df = pd.DataFrame(["a", "b", "c"])
         with pytest.raises(TypeError):
             df.plot()
 

From f94dbb45e119824b025536074353daa20758161f Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Mon, 16 Sep 2019 18:59:26 +0200
Subject: [PATCH 033/142] try if warning gone

---
 pandas/tests/plotting/test_frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index f623aad310319..5631addf4235f 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -3231,7 +3231,7 @@ def test_subplots_sharex_false(self):
 
     def test_hist_plot_by_argument(self):
         # GH 15079
-        df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
+        df = pd.DataFrame(np.random.randn(30, 2), columns=["A", "B"])
         df["C"] = np.random.choice(["a", "b", "c"], 30)
 
         _check_plot_works(df.plot.hist, column="A", by="C")

From 0415cb0a0662494aee61bd94979a96c18fac1c5a Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 17 Sep 2019 10:26:33 +0200
Subject: [PATCH 034/142] try again

---
 pandas/plotting/_matplotlib/core.py | 2 +-
 pandas/tests/plotting/test_frame.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 74687f1eff26d..3ad33ecd9e179 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -25,10 +25,10 @@
 )
 from pandas.core.dtypes.missing import isna, notna
 
-from pandas import concat
 import pandas.core.common as com
 from pandas.core.frame import DataFrame
 from pandas.core.index import MultiIndex
+from pandas.core.reshape.concat import concat
 
 from pandas.io.formats.printing import pprint_thing
 from pandas.plotting._matplotlib import converter
diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index 5631addf4235f..f623aad310319 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -3231,7 +3231,7 @@ def test_subplots_sharex_false(self):
 
     def test_hist_plot_by_argument(self):
         # GH 15079
-        df = pd.DataFrame(np.random.randn(30, 2), columns=["A", "B"])
+        df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
         df["C"] = np.random.choice(["a", "b", "c"], 30)
 
         _check_plot_works(df.plot.hist, column="A", by="C")

From c00588001be47262ed9bcb22d9307d9b28f8e25e Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 4 Jan 2020 12:49:28 +0100
Subject: [PATCH 035/142] fix conflict and merge master

---
 pandas/plotting/_matplotlib/hist.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 370b362f77fb5..0e60ca97758b9 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -36,18 +36,6 @@ def _args_adjust(self):
                     bins_list.append(self._caculcate_bins(group))
                 self.bins = bins_list
 
-            # create common bin edge
-            values = self.data._convert(datetime=True)._get_numeric_data()
-            values = np.ravel(values)
-            values = values[~isna(values)]
-
-            _, self.bins = np.histogram(
-                values,
-                bins=self.bins,
-                range=self.kwds.get("range", None),
-                weights=self.kwds.get("weights", None),
-            )
-
         if is_list_like(self.bottom):
             self.bottom = np.array(self.bottom)
 

From a1fabc513d6c03fcb57b5033c08b144794557514 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 4 Jan 2020 13:39:53 +0100
Subject: [PATCH 036/142] Fix linting error

---
 pandas/plotting/_matplotlib/core.py | 2 +-
 pandas/plotting/_matplotlib/hist.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 06c2c69bd3f0c..97ff2f4a2b1d7 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -106,7 +106,7 @@ def __init__(
         layout=None,
         include_bool=False,
         column=None,
-        **kwds
+        **kwds,
     ):
 
         import matplotlib.pyplot as plt
diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 0e60ca97758b9..d0d98e897c91a 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -90,7 +90,7 @@ def _group_plot(
         xlabelsize=None,
         ylabelsize=None,
         yrot=None,
-        **kwds
+        **kwds,
     ):
         if "figure" in kwds:
             raise ValueError(

From 70453f13b72c9170f5c6a19fcccb78f42f8025a9 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 4 Jan 2020 14:31:54 +0100
Subject: [PATCH 037/142] Add test

---
 pandas/tests/plotting/test_frame.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index d2f7aeca6cd73..ebf259b563261 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -3256,12 +3256,13 @@ def test_subplots_sharex_false(self):
         tm.assert_numpy_array_equal(axs[0].get_xticks(), expected_ax1)
         tm.assert_numpy_array_equal(axs[1].get_xticks(), expected_ax2)
 
-    def test_hist_plot_by_argument(self):
+    @pytest.mark.parametrize("column", ["A", ["A", "B"]])
+    def test_hist_plot_by_argument(self, column):
         # GH 15079
         df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
         df["C"] = np.random.choice(["a", "b", "c"], 30)
 
-        _check_plot_works(df.plot.hist, column="A", by="C")
+        _check_plot_works(df.plot.hist, column=column, by="C")
 
     def test_plot_no_rows(self):
         # GH 27758

From b6579a56c21b2e4b8ca0c903813f9dd15c0eb126 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 4 Jan 2020 14:41:13 +0100
Subject: [PATCH 038/142] remove unused code

---
 pandas/plotting/_matplotlib/core.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 97ff2f4a2b1d7..51b3a735ac9fa 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -248,12 +248,6 @@ def _iter_data(self, data=None, keep_index=False, fillna=None):
         if fillna is not None:
             data = data.fillna(fillna)
 
-        # TODO: unused?
-        # if self.sort_columns:
-        #     columns = com.try_sort(data.columns)
-        # else:
-        #     columns = data.columns
-
         if not isinstance(data.columns, ABCMultiIndex):
             for col, values in data.items():
                 if keep_index is True:

From e99f3dc685df32f7a610b909f3adc03b6255012d Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 4 Jan 2020 16:53:35 +0100
Subject: [PATCH 039/142] add test and make code more robust

---
 pandas/plotting/_matplotlib/core.py | 9 ++++++---
 pandas/tests/plotting/test_frame.py | 6 ++++--
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 51b3a735ac9fa..c7131c0868a98 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -248,7 +248,7 @@ def _iter_data(self, data=None, keep_index=False, fillna=None):
         if fillna is not None:
             data = data.fillna(fillna)
 
-        if not isinstance(data.columns, ABCMultiIndex):
+        if self.by is None:
             for col, values in data.items():
                 if keep_index is True:
                     yield col, values
@@ -258,10 +258,11 @@ def _iter_data(self, data=None, keep_index=False, fillna=None):
             cols = data.columns.get_level_values(0).unique()
 
             for col in cols:
+                mask = data.columns.get_level_values(0) == col
                 if keep_index is True:
-                    yield col, data[col]
+                    yield col, data.loc[:, mask]
                 else:
-                    yield col, data[col].values
+                    yield col, data.loc[:, mask].values
 
     @property
     def nseries(self):
@@ -420,6 +421,8 @@ def _compute_plot_data(self):
                 data_list = []
                 for key, group in grouped:
                     columns = MultiIndex.from_product([[key], self.column])
+                    #                    columns = MultiIndex([tuple([c for c in col]) for col in columns])
+
                     group = group[self.column]
                     group.columns = columns
                     data_list.append(group)
diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index ebf259b563261..8df4a16f73646 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -3256,13 +3256,15 @@ def test_subplots_sharex_false(self):
         tm.assert_numpy_array_equal(axs[0].get_xticks(), expected_ax1)
         tm.assert_numpy_array_equal(axs[1].get_xticks(), expected_ax2)
 
+    @pytest.mark.parametrize("by", ["C", ["C", "D"]])
     @pytest.mark.parametrize("column", ["A", ["A", "B"]])
-    def test_hist_plot_by_argument(self, column):
+    def test_hist_plot_by_argument(self, by, column):
         # GH 15079
         df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
         df["C"] = np.random.choice(["a", "b", "c"], 30)
+        df["D"] = np.random.choice(["a", "b", "c"], 30)
 
-        _check_plot_works(df.plot.hist, column=column, by="C")
+        _check_plot_works(df.plot.hist, column=column, by=by)
 
     def test_plot_no_rows(self):
         # GH 27758

From 99d6d67316d0432cdaedcaaecc683c638ca95ee4 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 4 Jan 2020 16:54:56 +0100
Subject: [PATCH 040/142] remove comment

---
 pandas/plotting/_matplotlib/core.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index c7131c0868a98..de2d5e20216fc 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -421,8 +421,6 @@ def _compute_plot_data(self):
                 data_list = []
                 for key, group in grouped:
                     columns = MultiIndex.from_product([[key], self.column])
-                    #                    columns = MultiIndex([tuple([c for c in col]) for col in columns])
-
                     group = group[self.column]
                     group.columns = columns
                     data_list.append(group)

From 8e2fcf62daf0efda8c81ae2bcc8150228a7a3bd5 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 4 Jan 2020 16:59:53 +0100
Subject: [PATCH 041/142] clean the code

---
 pandas/plotting/_matplotlib/core.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index de2d5e20216fc..d5ed818ba5757 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -268,14 +268,10 @@ def _iter_data(self, data=None, keep_index=False, fillna=None):
     def nseries(self):
         if self.data.ndim == 1:
             return 1
+        elif self.by is None:
+            return self.data.shape[1]
         else:
-
-            # If MultiIndex column, only return the first level which
-            # corresponds to by argument
-            if not isinstance(self.data.columns, ABCMultiIndex):
-                return self.data.shape[1]
-            else:
-                return len(set(self.data.columns.get_level_values(0)))
+            return len(set(self.data.columns.get_level_values(0)))
 
     def draw(self):
         self.plt.draw_if_interactive()

From d02f4ac35887b2af4482ffd3d6a0f72842f5197f Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 4 Jan 2020 17:17:51 +0100
Subject: [PATCH 042/142] simplify code

---
 pandas/plotting/_matplotlib/core.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index d5ed818ba5757..c6b70bbfcf971 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -113,6 +113,8 @@ def __init__(
 
         self.data = data
         self.by = by
+        if isinstance(column, str):
+            column = [column]
         self.column = column
 
         self.kind = kind
@@ -403,16 +405,12 @@ def _compute_plot_data(self):
             self.subplots = True
             grouped = data.groupby(self.by)
 
-            if self.column is not None:
-                grouped = grouped[self.column]
-
             if len(self.column) == 1:
                 # recreate data according to groupby object
                 data_dict = {}
                 for key, group in grouped:
-                    data_dict[key] = group
+                    data_dict[key] = group[self.column[0]]
                 data = DataFrame(data_dict)
-
             else:
                 data_list = []
                 for key, group in grouped:

From 947189c5b8a3903641f3598a0b9176aea42900b4 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 4 Jan 2020 17:29:29 +0100
Subject: [PATCH 043/142] simplify code

---
 pandas/plotting/_matplotlib/core.py | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index c6b70bbfcf971..b468fb5bf1071 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -405,21 +405,14 @@ def _compute_plot_data(self):
             self.subplots = True
             grouped = data.groupby(self.by)
 
-            if len(self.column) == 1:
-                # recreate data according to groupby object
-                data_dict = {}
-                for key, group in grouped:
-                    data_dict[key] = group[self.column[0]]
-                data = DataFrame(data_dict)
-            else:
-                data_list = []
-                for key, group in grouped:
-                    columns = MultiIndex.from_product([[key], self.column])
-                    group = group[self.column]
-                    group.columns = columns
-                    data_list.append(group)
-
-                data = concat(data_list, axis=1)
+            data_list = []
+            for key, group in grouped:
+                columns = MultiIndex.from_product([[key], self.column])
+                group = group[self.column]
+                group.columns = columns
+                data_list.append(group)
+
+            data = concat(data_list, axis=1)
 
         # GH16953, _convert is needed as fallback, for ``Series``
         # with ``dtype == object``

From 6b5203d40cb4f0bad503c2aad7c8b3a148b8c33c Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 4 Jan 2020 18:05:05 +0100
Subject: [PATCH 044/142] fix linting

---
 pandas/plotting/_matplotlib/core.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index b468fb5bf1071..cb5e544ee7218 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -24,7 +24,6 @@
 from pandas.core.dtypes.missing import isna, notna
 
 import pandas.core.common as com
-from pandas.core.frame import DataFrame
 from pandas.core.index import MultiIndex
 from pandas.core.reshape.concat import concat
 

From 27d0d214e1fe9e84a24cbae403186ce82d12ad30 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 4 Jan 2020 19:32:07 +0100
Subject: [PATCH 045/142] Add doc for hist

---
 pandas/plotting/_core.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index dd907457f7c32..bb8de96830147 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -1131,7 +1131,7 @@ def box(self, by=None, **kwargs):
         """
         return self(kind="box", by=by, **kwargs)
 
-    def hist(self, by=None, bins=10, **kwargs):
+    def hist(self, column=None, by=None, bins=10, **kwargs):
         """
         Draw one histogram of the DataFrame's columns.
 
@@ -1142,6 +1142,8 @@ def hist(self, by=None, bins=10, **kwargs):
 
         Parameters
         ----------
+        column: str or sequence, optional
+            If passed, will be used to limit data to a subset of columns.
         by : str or sequence, optional
             Column in the DataFrame to group by.
         bins : int, default 10
@@ -1176,7 +1178,7 @@ def hist(self, by=None, bins=10, **kwargs):
             >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000)
             >>> ax = df.plot.hist(bins=12, alpha=0.5)
         """
-        return self(kind="hist", by=by, bins=bins, **kwargs)
+        return self(kind="hist", column=column, by=by, bins=bins, **kwargs)
 
     def kde(self, bw_method=None, ind=None, **kwargs):
         """

From 48ff52120ee833d991423b2fe17e579f90ccb0c4 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 4 Jan 2020 19:59:18 +0100
Subject: [PATCH 046/142] revert change

---
 pandas/plotting/_core.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index bb8de96830147..dd907457f7c32 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -1131,7 +1131,7 @@ def box(self, by=None, **kwargs):
         """
         return self(kind="box", by=by, **kwargs)
 
-    def hist(self, column=None, by=None, bins=10, **kwargs):
+    def hist(self, by=None, bins=10, **kwargs):
         """
         Draw one histogram of the DataFrame's columns.
 
@@ -1142,8 +1142,6 @@ def hist(self, column=None, by=None, bins=10, **kwargs):
 
         Parameters
         ----------
-        column: str or sequence, optional
-            If passed, will be used to limit data to a subset of columns.
         by : str or sequence, optional
             Column in the DataFrame to group by.
         bins : int, default 10
@@ -1178,7 +1176,7 @@ def hist(self, column=None, by=None, bins=10, **kwargs):
             >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000)
             >>> ax = df.plot.hist(bins=12, alpha=0.5)
         """
-        return self(kind="hist", column=column, by=by, bins=bins, **kwargs)
+        return self(kind="hist", by=by, bins=bins, **kwargs)
 
     def kde(self, bw_method=None, ind=None, **kwargs):
         """

From f39d948d0736f3679d87e1bce7148b1fbd0ea2ed Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 4 Jan 2020 20:01:53 +0100
Subject: [PATCH 047/142] fix warning

---
 pandas/plotting/_matplotlib/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index cb5e544ee7218..bade4809d5594 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -24,7 +24,7 @@
 from pandas.core.dtypes.missing import isna, notna
 
 import pandas.core.common as com
-from pandas.core.index import MultiIndex
+from pandas import MultiIndex
 from pandas.core.reshape.concat import concat
 
 from pandas.io.formats.printing import pprint_thing

From 5d1705c6d9515749dc9b0fc53874d1d9cf4889f2 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 4 Jan 2020 20:02:36 +0100
Subject: [PATCH 048/142] isort

---
 pandas/plotting/_matplotlib/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index bade4809d5594..bbb38ec320c0e 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -23,8 +23,8 @@
 )
 from pandas.core.dtypes.missing import isna, notna
 
-import pandas.core.common as com
 from pandas import MultiIndex
+import pandas.core.common as com
 from pandas.core.reshape.concat import concat
 
 from pandas.io.formats.printing import pprint_thing

From 46a803162b4e6fddbbf16c8554f280a9d3cf883b Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 11 Jan 2020 14:01:17 +0100
Subject: [PATCH 049/142] simplify code

---
 pandas/plotting/_matplotlib/hist.py | 91 ++++++++++-------------------
 1 file changed, 30 insertions(+), 61 deletions(-)

diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index d0d98e897c91a..3ddda5362a798 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -68,7 +68,6 @@ def _plot(
     ):
         if column_num == 0:
             cls._initialize_stacker(ax, stacking_id, len(bins) - 1)
-        y = y[~isna(y)]
 
         base = np.zeros(len(bins) - 1)
         bottom = bottom + cls._get_stacked_values(ax, stacking_id, base, kwds["label"])
@@ -77,75 +76,45 @@ def _plot(
         cls._update_stacker(ax, stacking_id, n)
         return patches
 
-    @classmethod
-    def _group_plot(
-        cls,
-        axes,
-        data,
-        fig,
-        labels,
-        bins=None,
-        rot=90,
-        xrot=None,
-        xlabelsize=None,
-        ylabelsize=None,
-        yrot=None,
-        **kwds,
-    ):
-        if "figure" in kwds:
-            raise ValueError(
-                "Cannot pass 'figure' when using the "
-                "'by' argument, since a new 'Figure' instance "
-                "will be created"
-            )
-
-        xrot = xrot or rot
+    def _make_plot(self):
+        colors = self._get_colors()
+        stacking_id = self._get_stacking_id()
+        for i, (label, y) in enumerate(self._iter_data()):
+            ax = self._get_ax(i)
 
-        for i, (label, y) in enumerate(data):
-            ax = axes[i]
-            if len(y.shape) > 1:
-                notna = [col[~isna(col)] for col in y.T]
-                y_notna = np.array(np.array(notna).T)
-            else:
-                y_notna = y[~isna(y)]
-            ax.hist(y_notna, bins[i], label=labels, **kwds)
-            ax.set_title(pprint_thing(label))
+            kwds = self.kwds.copy()
+            label = pprint_thing(label)
+            kwds["label"] = label
 
-        _set_ticks_props(
-            axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot
-        )
+            style, kwds = self._apply_style_colors(colors, kwds, i, label)
+            if style is not None:
+                kwds["style"] = style
 
-        fig.subplots_adjust(
-            bottom=0.15, top=0.9, left=0.1, right=0.9, hspace=0.8, wspace=0.3
-        )
-        return axes
+            kwds = self._make_plot_keywords(kwds, y)
 
-    def _make_plot(self):
-        colors = self._get_colors()
-        stacking_id = self._get_stacking_id()
-        if self.by is None:
-            for i, (label, y) in enumerate(self._iter_data()):
-                ax = self._get_ax(i)
+            if self.by is not None:
+                kwds["bins"] = kwds["bins"][i]
+                kwds["label"] = self.column
+                kwds.pop("color")
 
-                kwds = self.kwds.copy()
-                label = pprint_thing(label)
-                kwds["label"] = label
+            y = self._reformat_y(y)
+            artists = self._plot(ax, y, column_num=i, stacking_id=stacking_id, **kwds)
 
-                style, kwds = self._apply_style_colors(colors, kwds, i, label)
-                if style is not None:
-                    kwds["style"] = style
+            # when by is applied, show title for subplots to know which group it is
+            if self.by is not None:
+                ax.set_title(pprint_thing(label))
 
-                kwds = self._make_plot_keywords(kwds, y)
-                artists = self._plot(
-                    ax, y, column_num=i, stacking_id=stacking_id, **kwds
-                )
-                self._add_legend_handle(artists[0], label, index=i)
+            self._add_legend_handle(artists[0], label, index=i)
 
+    def _reformat_y(self, y):
+        """Internal function to reformat y given `by` is applied or not.
+        """
+        if self.by is not None and len(y.shape) > 1:
+            notna = [col[~isna(col)] for col in y.T]
+            y = np.array(np.array(notna).T)
         else:
-            kwds = self.kwds.copy()
-            kwds = self._make_plot_keywords(kwds, None)
-            data = self._iter_data()
-            self._group_plot(self.axes, data, self.fig, self.column, **kwds)
+            y = y[~isna(y)]
+        return y
 
     def _make_plot_keywords(self, kwds, y):
         """merge BoxPlot/KdePlot properties to passed kwds"""

From 57a96e6d6e1f4281e865487f2e5946a40217add2 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 11 Jan 2020 14:03:54 +0100
Subject: [PATCH 050/142] simpler python

---
 pandas/plotting/_matplotlib/core.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 43488ed2a4c52..c42f27b3712fa 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -112,9 +112,7 @@ def __init__(
 
         self.data = data
         self.by = by
-        if isinstance(column, str):
-            column = [column]
-        self.column = column
+        self.column = [column] if isinstance(column, str) else column
 
         self.kind = kind
 

From 29127f08870537a1d3d542317129f5fb8f2c959f Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 11 Jan 2020 14:04:55 +0100
Subject: [PATCH 051/142] remove unused

---
 pandas/plotting/_matplotlib/hist.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 3ddda5362a798..8032d33db16f1 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -17,7 +17,6 @@ class HistPlot(LinePlot):
     def __init__(self, data, bins=10, bottom=0, **kwargs):
         self.bins = bins  # use mpl default
         self.bottom = bottom
-
         # Do not call LinePlot.__init__ which may fill nan
         MPLPlot.__init__(self, data, **kwargs)
 

From 61bb97f45bbf4ae39ec82064db5b1f7869b2b887 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 11 Jan 2020 14:06:08 +0100
Subject: [PATCH 052/142] restore blank lines

---
 pandas/plotting/_matplotlib/hist.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 8032d33db16f1..5fbfe7ea879b3 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -78,10 +78,12 @@ def _plot(
     def _make_plot(self):
         colors = self._get_colors()
         stacking_id = self._get_stacking_id()
+
         for i, (label, y) in enumerate(self._iter_data()):
             ax = self._get_ax(i)
 
             kwds = self.kwds.copy()
+
             label = pprint_thing(label)
             kwds["label"] = label
 

From 62fb9e660fa0f377adb77ac6aac0e09c472ae254 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 11 Jan 2020 15:54:04 +0100
Subject: [PATCH 053/142] Add extensive tests

---
 pandas/plotting/_matplotlib/hist.py |  2 +
 pandas/tests/plotting/test_frame.py | 98 +++++++++++++++++++++++++++++
 2 files changed, 100 insertions(+)

diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 5fbfe7ea879b3..a014af9c846d9 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -93,6 +93,8 @@ def _make_plot(self):
 
             kwds = self._make_plot_keywords(kwds, y)
 
+            # the bins is multi-dimension array now and each plot need only 1-d and
+            # when by is applied, label should be columns that are grouped
             if self.by is not None:
                 kwds["bins"] = kwds["bins"][i]
                 kwds["label"] = self.column
diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index 08e200e6bba2d..d0c3d9d23ef3e 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -3266,6 +3266,104 @@ def test_hist_plot_by_argument(self, by, column):
 
         _check_plot_works(df.plot.hist, column=column, by=by)
 
+    @pytest.mark.slow
+    @pytest.mark.parametrize(
+        "by, column, layout, axes_num",
+        [
+            (["C"], "A", (2, 2), 3),
+            ("C", "A", (2, 2), 3),
+            (["C"], ["A"], (1, 3), 3),
+            ("C", ["A", "B"], (3, 1), 3),
+            (["C", "D"], "A", (9, 1), 9),
+            (["C", "D"], "A", (3, 3), 9),
+            (["C", "D"], ["A"], (5, 2), 9),
+            (["C", "D"], ["A", "B"], (9, 1), 9),
+            (["C", "D"], ["A", "B"], (5, 2), 9),
+        ],
+    )
+    def test_hist_plot_layout_with_by(self, by, column, layout, axes_num):
+        # GH 15079
+        np.random.randn(2020)
+        df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
+        df["C"] = np.random.choice(["a", "b", "c"], 30)
+        df["D"] = np.random.choice(["a", "b", "c"], 30)
+
+        with tm.assert_produces_warning(UserWarning):
+            axes = _check_plot_works(df.plot.hist, column=column, by=by, layout=layout)
+        self._check_axes_shape(axes, axes_num=axes_num, layout=layout)
+
+    def test_hist_plot_invalid_layout_with_by(self):
+        # GH 15079, test if error is raised when invalid layout is given
+        np.random.randn(2020)
+        df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
+        df["C"] = np.random.choice(["a", "b", "c"], 30)
+        df["D"] = np.random.choice(["a", "b", "c"], 30)
+
+        # layout too small for all 3 plots
+        with pytest.raises(ValueError):
+            df.plot.hist(column=["A", "B"], by="C", layout=(1, 1))
+
+        # invalid format for layout
+        with pytest.raises(ValueError):
+            df.plot.hist(column=["A", "B"], by="C", layout=(1,))
+        with pytest.raises(ValueError):
+            df.plot.hist(column=["A", "B"], by="C", layout=(-1, -1))
+
+    @pytest.mark.slow
+    def test_axis_share_x_with_by(self):
+        # GH 15079
+        np.random.randn(2020)
+        df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
+        df["C"] = np.random.choice(["a", "b", "c"], 30)
+        df["D"] = np.random.choice(["a", "b", "c"], 30)
+
+        ax1, ax2, ax3 = df.plot.hist(column="A", by="C", sharex=True)
+
+        # share x
+        assert ax1._shared_x_axes.joined(ax1, ax2)
+        assert ax2._shared_x_axes.joined(ax1, ax2)
+        assert ax3._shared_x_axes.joined(ax1, ax3)
+        assert ax3._shared_x_axes.joined(ax2, ax3)
+
+        # don't share y
+        assert not ax1._shared_y_axes.joined(ax1, ax2)
+        assert not ax2._shared_y_axes.joined(ax1, ax2)
+        assert not ax3._shared_y_axes.joined(ax1, ax3)
+        assert not ax3._shared_y_axes.joined(ax2, ax3)
+
+    @pytest.mark.slow
+    def test_axis_share_y_with_by(self):
+        # GH 15079
+        np.random.randn(2020)
+        df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
+        df["C"] = np.random.choice(["a", "b", "c"], 30)
+        df["D"] = np.random.choice(["a", "b", "c"], 30)
+
+        ax1, ax2, ax3 = df.plot.hist(column="A", by="C", sharey=True)
+
+        # share y
+        assert ax1._shared_y_axes.joined(ax1, ax2)
+        assert ax2._shared_y_axes.joined(ax1, ax2)
+        assert ax3._shared_y_axes.joined(ax1, ax3)
+        assert ax3._shared_y_axes.joined(ax2, ax3)
+
+        # don't share x
+        assert not ax1._shared_x_axes.joined(ax1, ax2)
+        assert not ax2._shared_x_axes.joined(ax1, ax2)
+        assert not ax3._shared_x_axes.joined(ax1, ax3)
+        assert not ax3._shared_x_axes.joined(ax2, ax3)
+
+    @pytest.mark.parametrize("figsize", [(12, 8), (20, 10)])
+    def test_figure_shape_hist_with_by(self, figsize):
+        # GH 15079
+        np.random.randn(2020)
+        df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
+        df["C"] = np.random.choice(["a", "b", "c"], 30)
+        df["D"] = np.random.choice(["a", "b", "c"], 30)
+
+        axes = df.plot.hist(column="A", by="C", figsize=figsize)
+        self._check_axes_shape(axes, axes_num=3, figsize=figsize)
+
     def test_plot_no_rows(self):
         # GH 27758
         df = pd.DataFrame(columns=["foo"], dtype=int)

From 638174bece1ac74fa498e0436c32076a3a68883c Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 11 Jan 2020 16:18:21 +0100
Subject: [PATCH 054/142] fix seed

---
 pandas/tests/plotting/test_frame.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index d0c3d9d23ef3e..bebd915fec61d 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -3283,7 +3283,7 @@ def test_hist_plot_by_argument(self, by, column):
     )
     def test_hist_plot_layout_with_by(self, by, column, layout, axes_num):
         # GH 15079
-        np.random.randn(2020)
+        np.random.seed(0)
         df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
         df["C"] = np.random.choice(["a", "b", "c"], 30)
         df["D"] = np.random.choice(["a", "b", "c"], 30)
@@ -3294,7 +3294,7 @@ def test_hist_plot_layout_with_by(self, by, column, layout, axes_num):
 
     def test_hist_plot_invalid_layout_with_by(self):
         # GH 15079, test if error is raised when invalid layout is given
-        np.random.randn(2020)
+        np.random.seed(0)
         df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
         df["C"] = np.random.choice(["a", "b", "c"], 30)
         df["D"] = np.random.choice(["a", "b", "c"], 30)
@@ -3312,7 +3312,7 @@ def test_hist_plot_invalid_layout_with_by(self):
     @pytest.mark.slow
     def test_axis_share_x_with_by(self):
         # GH 15079
-        np.random.randn(2020)
+        np.random.seed(0)
         df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
         df["C"] = np.random.choice(["a", "b", "c"], 30)
         df["D"] = np.random.choice(["a", "b", "c"], 30)
@@ -3334,7 +3334,7 @@ def test_axis_share_x_with_by(self):
     @pytest.mark.slow
     def test_axis_share_y_with_by(self):
         # GH 15079
-        np.random.randn(2020)
+        np.random.seed(0)
         df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
         df["C"] = np.random.choice(["a", "b", "c"], 30)
         df["D"] = np.random.choice(["a", "b", "c"], 30)
@@ -3356,7 +3356,6 @@ def test_axis_share_y_with_by(self):
     @pytest.mark.parametrize("figsize", [(12, 8), (20, 10)])
     def test_figure_shape_hist_with_by(self, figsize):
         # GH 15079
-        np.random.randn(2020)
         df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
         df["C"] = np.random.choice(["a", "b", "c"], 30)
         df["D"] = np.random.choice(["a", "b", "c"], 30)

From 5adb25dccf30630d2caab9e0d5f878ac05a72a06 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Wed, 15 Jan 2020 09:22:28 +0100
Subject: [PATCH 055/142] code change based on reviews

---
 doc/source/whatsnew/v1.1.0.rst      |  3 ++-
 pandas/plotting/_core.py            | 10 ++++++++++
 pandas/plotting/_matplotlib/core.py | 14 +++++++-------
 pandas/plotting/_matplotlib/hist.py |  3 +--
 pandas/tests/plotting/test_frame.py | 11 ++++++++---
 5 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 721bcb0758992..327787cbebc97 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -127,7 +127,8 @@ Plotting
 ^^^^^^^^
 
 -
--
+- Implement ``by`` argument for :meth:`DataFrame.plot.hist` (:issue:`15079`)
+
 
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index dd907457f7c32..a65980221837b 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -1175,6 +1175,16 @@ def hist(self, by=None, bins=10, **kwargs):
             ...     columns = ['one'])
             >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000)
             >>> ax = df.plot.hist(bins=12, alpha=0.5)
+
+        .. plot::
+            :context: close-figs
+
+            >>> np.random.seed(159753)
+            >>> df = pd.DataFrame(np.random.randn(30, 2), columns=['A', 'B'])
+            >>> df['C'] = np.random.choice(['a', 'b', 'c'], 30)
+            >>> df['D'] = np.random.choice(['a', 'b', 'c'], 30)
+            >>> ax = df.plot.hist(column=['A', 'B'], by=['C'], figsize=(8, 10))
+
         """
         return self(kind="hist", by=by, bins=bins, **kwargs)
 
diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index c42f27b3712fa..b0b0369f95ce6 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -248,7 +248,7 @@ def _iter_data(self, data=None, keep_index=False, fillna=None):
 
         if self.by is None:
             for col, values in data.items():
-                if keep_index is True:
+                if keep_index:
                     yield col, values
                 else:
                     yield col, values.values
@@ -256,11 +256,11 @@ def _iter_data(self, data=None, keep_index=False, fillna=None):
             cols = data.columns.get_level_values(0).unique()
 
             for col in cols:
-                mask = data.columns.get_level_values(0) == col
+                data_value = data.loc[:, data.columns.get_level_values(0) == col]
                 if keep_index is True:
-                    yield col, data.loc[:, mask]
+                    yield col, data_value
                 else:
-                    yield col, data.loc[:, mask].values
+                    yield col, data_value.values
 
     @property
     def nseries(self):
@@ -404,9 +404,9 @@ def _compute_plot_data(self):
             data_list = []
             for key, group in grouped:
                 columns = MultiIndex.from_product([[key], self.column])
-                group = group[self.column]
-                group.columns = columns
-                data_list.append(group)
+                sub_group = group[self.column]
+                sub_group.columns = columns
+                data_list.append(sub_group)
 
             data = concat(data_list, axis=1)
 
diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 85e103970d40b..dc182536a3696 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -110,8 +110,7 @@ def _make_plot(self):
             self._add_legend_handle(artists[0], label, index=i)
 
     def _reformat_y(self, y):
-        """Internal function to reformat y given `by` is applied or not.
-        """
+        """Internal function to reformat y given `by` is applied or not."""
         if self.by is not None and len(y.shape) > 1:
             notna = [col[~isna(col)] for col in y.T]
             y = np.array(np.array(notna).T)
diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index bebd915fec61d..4292295e08793 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -4,6 +4,7 @@
 
 from datetime import date, datetime
 import itertools
+import re
 import string
 import warnings
 
@@ -3300,13 +3301,17 @@ def test_hist_plot_invalid_layout_with_by(self):
         df["D"] = np.random.choice(["a", "b", "c"], 30)
 
         # layout too small for all 3 plots
-        with pytest.raises(ValueError):
+        msg = "larger than required size"
+        with pytest.raises(ValueError, match=msg):
             df.plot.hist(column=["A", "B"], by="C", layout=(1, 1))
 
         # invalid format for layout
-        with pytest.raises(ValueError):
+        msg = re.escape("Layout must be a tuple of (rows, columns)")
+        with pytest.raises(ValueError, match=msg):
             df.plot.hist(column=["A", "B"], by="C", layout=(1,))
-        with pytest.raises(ValueError):
+
+        msg = "At least one dimension of layout must be positive"
+        with pytest.raises(ValueError, match=msg):
             df.plot.hist(column=["A", "B"], by="C", layout=(-1, -1))
 
     @pytest.mark.slow

From 7051432e30b3ced05a052ad7c55c8e3d7cfad9c2 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Wed, 15 Jan 2020 09:46:50 +0100
Subject: [PATCH 056/142] fix linting

---
 pandas/plotting/_core.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index a65980221837b..c4861eafb60b7 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -1182,9 +1182,7 @@ def hist(self, by=None, bins=10, **kwargs):
             >>> np.random.seed(159753)
             >>> df = pd.DataFrame(np.random.randn(30, 2), columns=['A', 'B'])
             >>> df['C'] = np.random.choice(['a', 'b', 'c'], 30)
-            >>> df['D'] = np.random.choice(['a', 'b', 'c'], 30)
             >>> ax = df.plot.hist(column=['A', 'B'], by=['C'], figsize=(8, 10))
-
         """
         return self(kind="hist", by=by, bins=bins, **kwargs)
 

From adbde9f77398e18a8e6bf3a44803e71eefa0d733 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Wed, 15 Jan 2020 09:50:02 +0100
Subject: [PATCH 057/142] update doc

---
 pandas/plotting/_core.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index c4861eafb60b7..9b4f69d380eea 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -1176,6 +1176,8 @@ def hist(self, by=None, bins=10, **kwargs):
             >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000)
             >>> ax = df.plot.hist(bins=12, alpha=0.5)
 
+        If `by` is defined, a grouped hist plot is generated:
+
         .. plot::
             :context: close-figs
 

From abd10f317eb78a5508ee3c66356b1d3283e3fc5d Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Mon, 10 Feb 2020 18:07:31 +0100
Subject: [PATCH 058/142] code change based on reviews

---
 pandas/plotting/_matplotlib/core.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 881d6b171b25a..26e1eb35b8879 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -112,7 +112,7 @@ def __init__(
 
         self.data = data
         self.by = by
-        self.column = [column] if isinstance(column, str) else column
+        self.column = [column] if not isinstance(column, list) else column
 
         self.kind = kind
 
@@ -247,20 +247,21 @@ def _iter_data(self, data=None, keep_index=False, fillna=None):
             data = data.fillna(fillna)
 
         if self.by is None:
-            for col, values in data.items():
-                if keep_index:
-                    yield col, values
-                else:
-                    yield col, values.values
+            cols = data.columns
         else:
             cols = data.columns.get_level_values(0).unique()
 
-            for col in cols:
-                data_value = data.loc[:, data.columns.get_level_values(0) == col]
-                if keep_index is True:
-                    yield col, data_value
-                else:
-                    yield col, data_value.values
+        for col in cols:
+            if self.by is None:
+                values = data.loc[:, col]
+            else:
+                # if `by` is defined, select columns which are grouped by
+                values = data.loc[:, data.columns.get_level_values(0) == col]
+
+            if keep_index:
+                yield col, values
+            else:
+                yield col, values.values
 
     @property
     def nseries(self):

From c20d81a75d8242a71573b6e0153b723388351c79 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Mon, 10 Feb 2020 20:08:57 +0100
Subject: [PATCH 059/142] fixup

---
 pandas/plotting/_matplotlib/core.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 26e1eb35b8879..cfa501103abf8 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -247,21 +247,21 @@ def _iter_data(self, data=None, keep_index=False, fillna=None):
             data = data.fillna(fillna)
 
         if self.by is None:
-            cols = data.columns
+            for col, values in data.items():
+                self._yield_values(keep_index, col, values)
         else:
             cols = data.columns.get_level_values(0).unique()
 
-        for col in cols:
-            if self.by is None:
-                values = data.loc[:, col]
-            else:
-                # if `by` is defined, select columns which are grouped by
+            for col in cols:
                 values = data.loc[:, data.columns.get_level_values(0) == col]
+                self._yield_values(keep_index, col, values)
 
-            if keep_index:
-                yield col, values
-            else:
-                yield col, values.values
+    def _yield_values(self, keep_index, col, values):
+        """Yield col and values based on keep_index value."""
+        if keep_index is True:
+            yield col, values
+        else:
+            yield col, values.values
 
     @property
     def nseries(self):

From 07112c00b661eae885e69b068ae28e0b52304656 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Mon, 10 Feb 2020 20:10:53 +0100
Subject: [PATCH 060/142] fixup

---
 pandas/plotting/_matplotlib/core.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index cfa501103abf8..d6302ce91cf4d 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -248,20 +248,19 @@ def _iter_data(self, data=None, keep_index=False, fillna=None):
 
         if self.by is None:
             for col, values in data.items():
-                self._yield_values(keep_index, col, values)
+                if keep_index is True:
+                    yield col, values
+                else:
+                    yield col, values.values
         else:
             cols = data.columns.get_level_values(0).unique()
 
             for col in cols:
-                values = data.loc[:, data.columns.get_level_values(0) == col]
-                self._yield_values(keep_index, col, values)
-
-    def _yield_values(self, keep_index, col, values):
-        """Yield col and values based on keep_index value."""
-        if keep_index is True:
-            yield col, values
-        else:
-            yield col, values.values
+                data_values = data.loc[:, data.columns.get_level_values(0) == col]
+                if keep_index is True:
+                    yield col, data_values
+                else:
+                    yield col, data_values.values
 
     @property
     def nseries(self):

From fb0b87cc8702f461b4d4bdca037b9e92f4255e8a Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 11 Feb 2020 19:55:00 +0100
Subject: [PATCH 061/142] code change on reviews

---
 pandas/plotting/_matplotlib/hist.py |  6 ++-
 pandas/tests/plotting/test_frame.py | 67 +++++++++++------------------
 2 files changed, 30 insertions(+), 43 deletions(-)

diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index dc182536a3696..ad8c6a60de161 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -1,3 +1,4 @@
+from typing import Union
 import numpy as np
 
 from pandas.core.dtypes.common import is_integer, is_list_like
@@ -9,6 +10,7 @@
 from pandas.io.formats.printing import pprint_thing
 from pandas.plotting._matplotlib.core import LinePlot, MPLPlot
 from pandas.plotting._matplotlib.tools import _flatten, _set_ticks_props, _subplots
+from pandas.core.series import Series
 
 
 class HistPlot(LinePlot):
@@ -38,7 +40,7 @@ def _args_adjust(self):
         if is_list_like(self.bottom):
             self.bottom = np.array(self.bottom)
 
-    def _caculcate_bins(self, data):
+    def _caculcate_bins(self, data: ABCDataFrame) -> np.array:
         """Calculate bins given data"""
 
         values = data._convert(datetime=True)._get_numeric_data()
@@ -109,7 +111,7 @@ def _make_plot(self):
 
             self._add_legend_handle(artists[0], label, index=i)
 
-    def _reformat_y(self, y):
+    def _reformat_y(self, y: Union[Series, np.array]) -> Union[Series, np.array]:
         """Internal function to reformat y given `by` is applied or not."""
         if self.by is not None and len(y.shape) > 1:
             notna = [col[~isna(col)] for col in y.T]
diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index 4292295e08793..0eb96d3b21715 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -26,6 +26,15 @@
 import pandas.plotting as plotting
 
 
+@pytest.fixture(scope="module")
+def test_hist_df():
+    np.random.seed(0)
+    df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
+    df["C"] = np.random.choice(["a", "b", "c"], 30)
+    df["D"] = np.random.choice(["a", "b", "c"], 30)
+    return df
+
+
 @td.skip_if_no_mpl
 class TestDataFramePlots(TestPlotBase):
     def setup_method(self, method):
@@ -3259,13 +3268,9 @@ def test_subplots_sharex_false(self):
 
     @pytest.mark.parametrize("by", ["C", ["C", "D"]])
     @pytest.mark.parametrize("column", ["A", ["A", "B"]])
-    def test_hist_plot_by_argument(self, by, column):
+    def test_hist_plot_by_argument(self, by, column, test_hist_df):
         # GH 15079
-        df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
-        df["C"] = np.random.choice(["a", "b", "c"], 30)
-        df["D"] = np.random.choice(["a", "b", "c"], 30)
-
-        _check_plot_works(df.plot.hist, column=column, by=by)
+        _check_plot_works(test_hist_df.plot.hist, column=column, by=by)
 
     @pytest.mark.slow
     @pytest.mark.parametrize(
@@ -3282,47 +3287,36 @@ def test_hist_plot_by_argument(self, by, column):
             (["C", "D"], ["A", "B"], (5, 2), 9),
         ],
     )
-    def test_hist_plot_layout_with_by(self, by, column, layout, axes_num):
+    def test_hist_plot_layout_with_by(self, by, column, layout, axes_num, test_hist_df):
         # GH 15079
-        np.random.seed(0)
-        df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
-        df["C"] = np.random.choice(["a", "b", "c"], 30)
-        df["D"] = np.random.choice(["a", "b", "c"], 30)
-
+        # _check_plot_works adds an ax so catch warning. see GH #13188
         with tm.assert_produces_warning(UserWarning):
-            axes = _check_plot_works(df.plot.hist, column=column, by=by, layout=layout)
+            axes = _check_plot_works(
+                test_hist_df.plot.hist, column=column, by=by, layout=layout
+            )
         self._check_axes_shape(axes, axes_num=axes_num, layout=layout)
 
-    def test_hist_plot_invalid_layout_with_by(self):
+    def test_hist_plot_invalid_layout_with_by_raises(self, test_hist_df):
         # GH 15079, test if error is raised when invalid layout is given
-        np.random.seed(0)
-        df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
-        df["C"] = np.random.choice(["a", "b", "c"], 30)
-        df["D"] = np.random.choice(["a", "b", "c"], 30)
 
         # layout too small for all 3 plots
         msg = "larger than required size"
         with pytest.raises(ValueError, match=msg):
-            df.plot.hist(column=["A", "B"], by="C", layout=(1, 1))
+            test_hist_df.plot.hist(column=["A", "B"], by="C", layout=(1, 1))
 
         # invalid format for layout
         msg = re.escape("Layout must be a tuple of (rows, columns)")
         with pytest.raises(ValueError, match=msg):
-            df.plot.hist(column=["A", "B"], by="C", layout=(1,))
+            test_hist_df.plot.hist(column=["A", "B"], by="C", layout=(1,))
 
         msg = "At least one dimension of layout must be positive"
         with pytest.raises(ValueError, match=msg):
-            df.plot.hist(column=["A", "B"], by="C", layout=(-1, -1))
+            test_hist_df.plot.hist(column=["A", "B"], by="C", layout=(-1, -1))
 
     @pytest.mark.slow
-    def test_axis_share_x_with_by(self):
+    def test_axis_share_x_with_by(self, test_hist_df):
         # GH 15079
-        np.random.seed(0)
-        df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
-        df["C"] = np.random.choice(["a", "b", "c"], 30)
-        df["D"] = np.random.choice(["a", "b", "c"], 30)
-
-        ax1, ax2, ax3 = df.plot.hist(column="A", by="C", sharex=True)
+        ax1, ax2, ax3 = test_hist_df.plot.hist(column="A", by="C", sharex=True)
 
         # share x
         assert ax1._shared_x_axes.joined(ax1, ax2)
@@ -3337,14 +3331,9 @@ def test_axis_share_x_with_by(self):
         assert not ax3._shared_y_axes.joined(ax2, ax3)
 
     @pytest.mark.slow
-    def test_axis_share_y_with_by(self):
+    def test_axis_share_y_with_by(self, test_hist_df):
         # GH 15079
-        np.random.seed(0)
-        df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
-        df["C"] = np.random.choice(["a", "b", "c"], 30)
-        df["D"] = np.random.choice(["a", "b", "c"], 30)
-
-        ax1, ax2, ax3 = df.plot.hist(column="A", by="C", sharey=True)
+        ax1, ax2, ax3 = test_hist_df.plot.hist(column="A", by="C", sharey=True)
 
         # share y
         assert ax1._shared_y_axes.joined(ax1, ax2)
@@ -3359,13 +3348,9 @@ def test_axis_share_y_with_by(self):
         assert not ax3._shared_x_axes.joined(ax2, ax3)
 
     @pytest.mark.parametrize("figsize", [(12, 8), (20, 10)])
-    def test_figure_shape_hist_with_by(self, figsize):
+    def test_figure_shape_hist_with_by(self, figsize, test_hist_df):
         # GH 15079
-        df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
-        df["C"] = np.random.choice(["a", "b", "c"], 30)
-        df["D"] = np.random.choice(["a", "b", "c"], 30)
-
-        axes = df.plot.hist(column="A", by="C", figsize=figsize)
+        axes = test_hist_df.plot.hist(column="A", by="C", figsize=figsize)
         self._check_axes_shape(axes, axes_num=3, figsize=figsize)
 
     def test_plot_no_rows(self):

From a6a8e579aa2cd89252f32435ea2d0507dfc5aefc Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 11 Feb 2020 20:25:56 +0100
Subject: [PATCH 062/142] fix isort

---
 pandas/plotting/_matplotlib/hist.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index ad8c6a60de161..338cde64d9082 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -1,4 +1,5 @@
 from typing import Union
+
 import numpy as np
 
 from pandas.core.dtypes.common import is_integer, is_list_like
@@ -6,11 +7,11 @@
 from pandas.core.dtypes.missing import isna, remove_na_arraylike
 
 import pandas.core.common as com
+from pandas.core.series import Series
 
 from pandas.io.formats.printing import pprint_thing
 from pandas.plotting._matplotlib.core import LinePlot, MPLPlot
 from pandas.plotting._matplotlib.tools import _flatten, _set_ticks_props, _subplots
-from pandas.core.series import Series
 
 
 class HistPlot(LinePlot):

From 7f77f485d3076bb937aaf32a1b7c06e0cf32aba6 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Wed, 12 Feb 2020 08:36:20 +0100
Subject: [PATCH 063/142] short code

---
 pandas/plotting/_matplotlib/core.py | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index d6302ce91cf4d..68f81edd889e5 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -247,20 +247,18 @@ def _iter_data(self, data=None, keep_index=False, fillna=None):
             data = data.fillna(fillna)
 
         if self.by is None:
-            for col, values in data.items():
-                if keep_index is True:
-                    yield col, values
-                else:
-                    yield col, values.values
+            for col, val in data.items():
+                if not keep_index:
+                    val = val.values
+                yield col, val
         else:
             cols = data.columns.get_level_values(0).unique()
 
             for col in cols:
-                data_values = data.loc[:, data.columns.get_level_values(0) == col]
-                if keep_index is True:
-                    yield col, data_values
-                else:
-                    yield col, data_values.values
+                val = data.loc[:, data.columns.get_level_values(0) == col]
+                if not keep_index:
+                    val = val.values
+                yield col, val
 
     @property
     def nseries(self):

From a120d27ff5bb90e991dfcec50f008aabe15a1583 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sun, 23 Feb 2020 09:53:21 +0100
Subject: [PATCH 064/142] simpler python

---
 pandas/plotting/_matplotlib/core.py | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 79afb3abd6e24..c8d167dddd90b 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -246,19 +246,18 @@ def _iter_data(self, data=None, keep_index=False, fillna=None):
         if fillna is not None:
             data = data.fillna(fillna)
 
-        if self.by is None:
-            for col, val in data.items():
-                if not keep_index:
-                    val = val.values
-                yield col, val
-        else:
-            cols = data.columns.get_level_values(0).unique()
-
-            for col in cols:
-                val = data.loc[:, data.columns.get_level_values(0) == col]
-                if not keep_index:
-                    val = val.values
-                yield col, val
+        iter_data = data
+        if self.by is not None:
+            cols = data.columns.levels[0]
+            iter_data = {
+                col: data.loc[:, data.columns.get_level_values(0) == col]
+                for col in cols
+            }
+
+        for col, val in iter_data.items():
+            if not keep_index:
+                val = val.values
+            yield col, val
 
     @property
     def nseries(self):

From f87afee50716c5b24dc2f2cad074d5638ae5172e Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sun, 23 Feb 2020 09:55:05 +0100
Subject: [PATCH 065/142] add inline comment

---
 pandas/plotting/_matplotlib/core.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index c8d167dddd90b..6bb651f7300eb 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -248,6 +248,7 @@ def _iter_data(self, data=None, keep_index=False, fillna=None):
 
         iter_data = data
         if self.by is not None:
+            # select sub-columns based on the value of first level of MI
             cols = data.columns.levels[0]
             iter_data = {
                 col: data.loc[:, data.columns.get_level_values(0) == col]

From 82711ee082d08e2d862338ae4403cbfaa4a35d0f Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sun, 23 Feb 2020 09:57:34 +0100
Subject: [PATCH 066/142] simplier pandas

---
 pandas/plotting/_matplotlib/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 6bb651f7300eb..8bf39398f889a 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -267,7 +267,7 @@ def nseries(self):
         elif self.by is None:
             return self.data.shape[1]
         else:
-            return len(set(self.data.columns.get_level_values(0)))
+            return len(self.data.columns.levels[0])
 
     def draw(self):
         self.plt.draw_if_interactive()

From 60f729811232019f25bf6a4bbf2bb56f043f0532 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sun, 23 Feb 2020 16:58:46 +0100
Subject: [PATCH 067/142] code change on JR review

---
 doc/source/whatsnew/v1.1.0.rst      |  2 +-
 pandas/plotting/_core.py            | 11 ++++++++++-
 pandas/plotting/_matplotlib/core.py |  3 ++-
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 0ce622fdff98a..8f61342c05cd8 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -43,6 +43,7 @@ Other enhancements
 
 - :class:`Styler` may now render CSS more efficiently where multiple cells have the same styling (:issue:`30876`)
 - When writing directly to a sqlite connection :func:`to_sql` now supports the ``multi`` method (:issue:`29921`)
+- Implement ``by`` argument for :meth:`DataFrame.plot.hist` (:issue:`15079`)
 -
 -
 
@@ -203,7 +204,6 @@ Plotting
 ^^^^^^^^
 
 -
-- Implement ``by`` argument for :meth:`DataFrame.plot.hist` (:issue:`15079`)
 - :func:`.plot` for line/bar now accepts color by dictonary (:issue:`8193`).
 -
 - Bug in :meth:`DataFrame.boxplot` and :meth:`DataFrame.plot.boxplot` lost color attributes of ``medianprops``, ``whiskerprops``, ``capprops`` and ``medianprops`` (:issue:`30346`)
diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index f914ae6bc0649..4b87a1e624583 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -1177,6 +1177,9 @@ def hist(self, by=None, bins=10, **kwargs):
         ----------
         by : str or sequence, optional
             Column in the DataFrame to group by.
+
+        .. versionadded:: 1.1.0
+
         bins : int, default 10
             Number of histogram bins to be used.
         **kwargs
@@ -1209,7 +1212,8 @@ def hist(self, by=None, bins=10, **kwargs):
             >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000)
             >>> ax = df.plot.hist(bins=12, alpha=0.5)
 
-        If `by` is defined, a grouped hist plot is generated:
+        If `by` can be assigned by the DataFrame column names, or a list of column
+        names for which to group, and a grouped hist plot is generated:
 
         .. plot::
             :context: close-figs
@@ -1218,6 +1222,11 @@ def hist(self, by=None, bins=10, **kwargs):
             >>> df = pd.DataFrame(np.random.randn(30, 2), columns=['A', 'B'])
             >>> df['C'] = np.random.choice(['a', 'b', 'c'], 30)
             >>> ax = df.plot.hist(column=['A', 'B'], by=['C'], figsize=(8, 10))
+
+        .. plot::
+            :context: close-figs
+
+            >>> ax = df.plot.hist(column=['A', 'B'], by='C', figsize=(8, 10))
         """
         return self(kind="hist", by=by, bins=bins, **kwargs)
 
diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 8bf39398f889a..77921f9906636 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -267,7 +267,7 @@ def nseries(self):
         elif self.by is None:
             return self.data.shape[1]
         else:
-            return len(self.data.columns.levels[0])
+            return self._grouped_data_size
 
     def draw(self):
         self.plt.draw_if_interactive()
@@ -398,6 +398,7 @@ def _compute_plot_data(self):
         if self.by is not None:
             self.subplots = True
             grouped = data.groupby(self.by)
+            self._grouped_data_size = len(grouped)
 
             data_list = []
             for key, group in grouped:

From 071488b5b2245e923dc17b2b17e8defeeda212d7 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sun, 23 Feb 2020 17:17:50 +0100
Subject: [PATCH 068/142] fix linting

---
 pandas/plotting/_core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index 4b87a1e624583..93b3c7ec8473d 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -1178,7 +1178,7 @@ def hist(self, by=None, bins=10, **kwargs):
         by : str or sequence, optional
             Column in the DataFrame to group by.
 
-        .. versionadded:: 1.1.0
+            .. versionadded:: 1.1.0
 
         bins : int, default 10
             Number of histogram bins to be used.

From 867094a72735d1b9040f740c115935e51095cc97 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sun, 8 Mar 2020 17:24:07 +0100
Subject: [PATCH 069/142] code change on reviews

---
 pandas/plotting/_matplotlib/core.py | 40 +++++++++++++++++++----------
 pandas/plotting/_matplotlib/hist.py |  4 +--
 2 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 77921f9906636..5b25e4e9d1aff 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -112,7 +112,7 @@ def __init__(
 
         self.data = data
         self.by = by
-        self.column = [column] if not isinstance(column, list) else column
+        self.columns = [column] if not isinstance(column, list) else column
 
         self.kind = kind
 
@@ -385,6 +385,28 @@ def result(self):
             else:
                 return self.axes[0]
 
+    def _transform_grouped_data(self, data: ABCDataFrame) -> ABCDataFrame:
+        """
+        Internal function to transform grouped DataFrame object to a normal
+        DataFrame to facilitate further manipulation.
+
+        The input is the original DataFrame to plot, and output is the reconstructed
+        DataFrame with MultiIndex columns. The first level of MI is unique values of
+        groups, and second level of MI is the columns selected by users.
+        """
+        grouped = data.groupby(self.by)
+        self._grouped_data_size = len(grouped)
+
+        data_list = []
+        for key, group in grouped:
+            columns = MultiIndex.from_product([[key], self.columns])
+            sub_group = group[self.columns]
+            sub_group.columns = columns
+            data_list.append(sub_group)
+
+        data = concat(data_list, axis=1)
+        return data
+
     def _compute_plot_data(self):
         data = self.data
 
@@ -394,20 +416,12 @@ def _compute_plot_data(self):
                 label = "None"
             data = data.to_frame(name=label)
 
-        # GH15079 restructure data if by is defined
+        # GH15079 reconstruct data if by is defined
         if self.by is not None:
-            self.subplots = True
-            grouped = data.groupby(self.by)
-            self._grouped_data_size = len(grouped)
 
-            data_list = []
-            for key, group in grouped:
-                columns = MultiIndex.from_product([[key], self.column])
-                sub_group = group[self.column]
-                sub_group.columns = columns
-                data_list.append(sub_group)
-
-            data = concat(data_list, axis=1)
+            # Set subplots to True if self.by is defined
+            self.subplots = True
+            data = self._transform_grouped_data(data)
 
         # GH16953, _convert is needed as fallback, for ``Series``
         # with ``dtype == object``
diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 338cde64d9082..8960edeab58b3 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -32,7 +32,7 @@ def _args_adjust(self):
                 self.bins = self._caculcate_bins(self.data)
 
             else:
-                grouped = self.data.groupby(self.by)[self.column]
+                grouped = self.data.groupby(self.by)[self.columns]
                 bins_list = []
                 for key, group in grouped:
                     bins_list.append(self._caculcate_bins(group))
@@ -100,7 +100,7 @@ def _make_plot(self):
             # when by is applied, label should be columns that are grouped
             if self.by is not None:
                 kwds["bins"] = kwds["bins"][i]
-                kwds["label"] = self.column
+                kwds["label"] = self.columns
                 kwds.pop("color")
 
             y = self._reformat_y(y)

From b0f06b2ac0b499e3050b94568724682a17268f0e Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sun, 8 Mar 2020 17:34:35 +0100
Subject: [PATCH 070/142] Add docstring

---
 pandas/plotting/_matplotlib/core.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 5b25e4e9d1aff..539a264e74dd3 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -241,6 +241,29 @@ def _validate_color_args(self):
                         )
 
     def _iter_data(self, data=None, keep_index=False, fillna=None):
+        """
+        Iterate data to yield inputs for plotting methods.
+
+        When self.by is not defined, iter_data is served as a DataFrame, and column
+        name and Series or values of Series are yielded.
+
+        When self.by is defined, since values of multiple columns might have to be
+        yielded at the same time to visualize multiple plots, `df.items()` cannot
+        achieve it, so here to convert iter_data to dictionaries to provide input
+        for plot methods, and column name and DataFrame or values of DataFrame are
+        yielded.
+
+        Parameters
+        ----------
+        data: DataFrame
+        keep_index: bool, if to keep original index or not
+        fillna: values used to fill NAs, default is None
+
+        Returns
+        -------
+        If self.by is None, return column name and Series/values of Series; If self.by
+        is not None, return column name and DataFrame/values of DataFrame.
+        """
         if data is None:
             data = self.data
         if fillna is not None:

From 111e89c008c97bf3de8e2dee5d820f6aa23b8311 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sun, 15 Mar 2020 16:54:15 +0100
Subject: [PATCH 071/142] fix typo

---
 pandas/plotting/_matplotlib/hist.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 8960edeab58b3..f7ff19ae783a5 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -29,19 +29,19 @@ def _args_adjust(self):
         # where subplots are created based on by argument
         if is_integer(self.bins):
             if self.by is None:
-                self.bins = self._caculcate_bins(self.data)
+                self.bins = self._calculate_bins(self.data)
 
             else:
                 grouped = self.data.groupby(self.by)[self.columns]
                 bins_list = []
                 for key, group in grouped:
-                    bins_list.append(self._caculcate_bins(group))
+                    bins_list.append(self._calculate_bins(group))
                 self.bins = bins_list
 
         if is_list_like(self.bottom):
             self.bottom = np.array(self.bottom)
 
-    def _caculcate_bins(self, data: ABCDataFrame) -> np.array:
+    def _calculate_bins(self, data: ABCDataFrame) -> np.array:
         """Calculate bins given data"""
 
         values = data._convert(datetime=True)._get_numeric_data()

From 83ec86809de41a489533795c416eb7b5ac5af40c Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sun, 15 Mar 2020 16:58:22 +0100
Subject: [PATCH 072/142] remove blank

---
 doc/source/whatsnew/v1.1.0.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 3b01676e6ec53..1c107ecd422dc 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -346,7 +346,6 @@ I/O
 Plotting
 ^^^^^^^^
 
--
 - :func:`.plot` for line/bar now accepts color by dictonary (:issue:`8193`).
 -
 - Bug in :meth:`DataFrame.boxplot` and :meth:`DataFrame.plot.boxplot` lost color attributes of ``medianprops``, ``whiskerprops``, ``capprops`` and ``medianprops`` (:issue:`30346`)

From d6c8566c1d1a7ce9b8c711decb99a25f2942e011 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sun, 15 Mar 2020 17:15:32 +0100
Subject: [PATCH 073/142] use more meaningful example

---
 pandas/plotting/_core.py | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index 93b3c7ec8473d..261d093ae2026 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -1212,21 +1212,15 @@ def hist(self, by=None, bins=10, **kwargs):
             >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000)
             >>> ax = df.plot.hist(bins=12, alpha=0.5)
 
-        If `by` can be assigned by the DataFrame column names, or a list of column
-        names for which to group, and a grouped hist plot is generated:
+        A grouped histogram can be generated by providing the parameter `by` (which
+        can be a column name, or a list of column names):
 
         .. plot::
             :context: close-figs
 
-            >>> np.random.seed(159753)
-            >>> df = pd.DataFrame(np.random.randn(30, 2), columns=['A', 'B'])
-            >>> df['C'] = np.random.choice(['a', 'b', 'c'], 30)
-            >>> ax = df.plot.hist(column=['A', 'B'], by=['C'], figsize=(8, 10))
-
-        .. plot::
-            :context: close-figs
-
-            >>> ax = df.plot.hist(column=['A', 'B'], by='C', figsize=(8, 10))
+            >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85]
+            >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list})
+            >>> ax = df.plot.hist(column=["age"], by="gender")
         """
         return self(kind="hist", by=by, bins=bins, **kwargs)
 

From 6a0ac8dd588038a8f346020ed091e6df28b509e4 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sun, 15 Mar 2020 17:17:44 +0100
Subject: [PATCH 074/142] keep as is

---
 pandas/plotting/_matplotlib/core.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 539a264e74dd3..9920db7538ae3 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -278,10 +278,11 @@ def _iter_data(self, data=None, keep_index=False, fillna=None):
                 for col in cols
             }
 
-        for col, val in iter_data.items():
-            if not keep_index:
-                val = val.values
-            yield col, val
+        for col, values in iter_data.items():
+            if keep_index is True:
+                yield col, values
+            else:
+                yield col, values.values
 
     @property
     def nseries(self):

From 49d0791adc6be7cdb61c956b35dfbd0e29934006 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sun, 15 Mar 2020 17:19:14 +0100
Subject: [PATCH 075/142] remove less useful comment

---
 pandas/plotting/_matplotlib/core.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 9920db7538ae3..6fdaa691abe80 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -442,8 +442,6 @@ def _compute_plot_data(self):
 
         # GH15079 reconstruct data if by is defined
         if self.by is not None:
-
-            # Set subplots to True if self.by is defined
             self.subplots = True
             data = self._transform_grouped_data(data)
 

From 2bfbe78149008cd509703cf4f0fa565a6f9559f3 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sun, 15 Mar 2020 17:23:57 +0100
Subject: [PATCH 076/142] change figsize

---
 pandas/plotting/_core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index 261d093ae2026..29935b6dcda34 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -1220,7 +1220,7 @@ def hist(self, by=None, bins=10, **kwargs):
 
             >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85]
             >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list})
-            >>> ax = df.plot.hist(column=["age"], by="gender")
+            >>> ax = df.plot.hist(column=["age"], by="gender", figsize=(10, 8))
         """
         return self(kind="hist", by=by, bins=bins, **kwargs)
 

From c5d75189945f1c4efa974987d45ccbf148668435 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 4 Apr 2020 11:53:08 +0200
Subject: [PATCH 077/142] clean iter_data

---
 pandas/plotting/_matplotlib/core.py | 11 +----------
 pandas/plotting/_matplotlib/hist.py | 15 ++++++++++++++-
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 6fdaa691abe80..173fdbec42029 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -269,16 +269,7 @@ def _iter_data(self, data=None, keep_index=False, fillna=None):
         if fillna is not None:
             data = data.fillna(fillna)
 
-        iter_data = data
-        if self.by is not None:
-            # select sub-columns based on the value of first level of MI
-            cols = data.columns.levels[0]
-            iter_data = {
-                col: data.loc[:, data.columns.get_level_values(0) == col]
-                for col in cols
-            }
-
-        for col, values in iter_data.items():
+        for col, values in data.items():
             if keep_index is True:
                 yield col, values
             else:
diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index f7ff19ae783a5..e35f54d65f570 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -78,11 +78,24 @@ def _plot(
         cls._update_stacker(ax, stacking_id, n)
         return patches
 
+    def _create_iter_data(self):
+        """Create data for iteration if `by` is assigned"""
+        data = self.data
+        if self.by is not None:
+            # select sub-columns based on the value of first level of MI
+            cols = data.columns.levels[0]
+            iter_data = {
+                col: data.loc[:, data.columns.get_level_values(0) == col]
+                for col in cols
+            }
+        return iter_data
+
     def _make_plot(self):
         colors = self._get_colors()
         stacking_id = self._get_stacking_id()
+        data = self._create_iter_data()
 
-        for i, (label, y) in enumerate(self._iter_data()):
+        for i, (label, y) in enumerate(self._iter_data(data=data)):
             ax = self._get_ax(i)
 
             kwds = self.kwds.copy()

From 03356cea0796e18953239e6ac796cb91a332930d Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 4 Apr 2020 11:55:14 +0200
Subject: [PATCH 078/142] remove unused docs

---
 pandas/plotting/_matplotlib/core.py | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 173fdbec42029..6866770dbcd84 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -241,29 +241,6 @@ def _validate_color_args(self):
                         )
 
     def _iter_data(self, data=None, keep_index=False, fillna=None):
-        """
-        Iterate data to yield inputs for plotting methods.
-
-        When self.by is not defined, iter_data is served as a DataFrame, and column
-        name and Series or values of Series are yielded.
-
-        When self.by is defined, since values of multiple columns might have to be
-        yielded at the same time to visualize multiple plots, `df.items()` cannot
-        achieve it, so here to convert iter_data to dictionaries to provide input
-        for plot methods, and column name and DataFrame or values of DataFrame are
-        yielded.
-
-        Parameters
-        ----------
-        data: DataFrame
-        keep_index: bool, if to keep original index or not
-        fillna: values used to fill NAs, default is None
-
-        Returns
-        -------
-        If self.by is None, return column name and Series/values of Series; If self.by
-        is not None, return column name and DataFrame/values of DataFrame.
-        """
         if data is None:
             data = self.data
         if fillna is not None:

From 7abc47df909a7d68f717a7de6d4bdb8bae15b9d2 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 4 Apr 2020 12:01:46 +0200
Subject: [PATCH 079/142] cleaner pandas

---
 pandas/plotting/_matplotlib/core.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 6866770dbcd84..eaac0c812edb9 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -25,6 +25,7 @@
 
 from pandas import MultiIndex
 import pandas.core.common as com
+from pandas.core.groupby.generic import DataFrameGroupBy
 from pandas.core.reshape.concat import concat
 
 from pandas.io.formats.printing import pprint_thing
@@ -377,7 +378,7 @@ def result(self):
             else:
                 return self.axes[0]
 
-    def _transform_grouped_data(self, data: ABCDataFrame) -> ABCDataFrame:
+    def _reformat_grouped_data(self, grouped: DataFrameGroupBy) -> ABCDataFrame:
         """
         Internal function to transform grouped DataFrame object to a normal
         DataFrame to facilitate further manipulation.
@@ -386,7 +387,6 @@ def _transform_grouped_data(self, data: ABCDataFrame) -> ABCDataFrame:
         DataFrame with MultiIndex columns. The first level of MI is unique values of
         groups, and second level of MI is the columns selected by users.
         """
-        grouped = data.groupby(self.by)
         self._grouped_data_size = len(grouped)
 
         data_list = []
@@ -411,7 +411,8 @@ def _compute_plot_data(self):
         # GH15079 reconstruct data if by is defined
         if self.by is not None:
             self.subplots = True
-            data = self._transform_grouped_data(data)
+            grouped_data = data.groupby(self.by)
+            data = self._transform_grouped_data(grouped_data)
 
         # GH16953, _convert is needed as fallback, for ``Series``
         # with ``dtype == object``

From db832b413dbc9e4e4c165df75876813930f5a0cb Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 4 Apr 2020 12:04:13 +0200
Subject: [PATCH 080/142] cleaner

---
 pandas/plotting/_matplotlib/core.py | 15 +++++++++++++++
 pandas/plotting/_matplotlib/hist.py | 12 ------------
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index eaac0c812edb9..ec53fbb709354 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -241,6 +241,21 @@ def _validate_color_args(self):
                             "pass 'style' without a color symbol"
                         )
 
+    def _create_iter_data(self):
+        """
+        Create data for iteration if `by` is assigned, and it is used in both
+        hist and boxplot.
+        """
+        data = self.data
+        if self.by is not None:
+            # select sub-columns based on the value of first level of MI
+            cols = data.columns.levels[0]
+            iter_data = {
+                col: data.loc[:, data.columns.get_level_values(0) == col]
+                for col in cols
+            }
+        return iter_data
+
     def _iter_data(self, data=None, keep_index=False, fillna=None):
         if data is None:
             data = self.data
diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index e35f54d65f570..c5e6324fcc462 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -78,18 +78,6 @@ def _plot(
         cls._update_stacker(ax, stacking_id, n)
         return patches
 
-    def _create_iter_data(self):
-        """Create data for iteration if `by` is assigned"""
-        data = self.data
-        if self.by is not None:
-            # select sub-columns based on the value of first level of MI
-            cols = data.columns.levels[0]
-            iter_data = {
-                col: data.loc[:, data.columns.get_level_values(0) == col]
-                for col in cols
-            }
-        return iter_data
-
     def _make_plot(self):
         colors = self._get_colors()
         stacking_id = self._get_stacking_id()

From 9ae59871f4366ab2ea5f3c79202ad24ac274d387 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 4 Apr 2020 12:19:47 +0200
Subject: [PATCH 081/142] fixup

---
 pandas/plotting/_matplotlib/core.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index e81bcddcf6488..8862276161b5d 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -246,12 +246,12 @@ def _create_iter_data(self):
         Create data for iteration if `by` is assigned, and it is used in both
         hist and boxplot.
         """
-        data = self.data
+        iter_data = self.data
         if self.by is not None:
             # select sub-columns based on the value of first level of MI
-            cols = data.columns.levels[0]
+            cols = self.data.columns.levels[0]
             iter_data = {
-                col: data.loc[:, data.columns.get_level_values(0) == col]
+                col: self.data.loc[:, self.data.columns.get_level_values(0) == col]
                 for col in cols
             }
         return iter_data

From 10c2ad11b239ce312530435a210aaacdfd87c03b Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 4 Apr 2020 12:34:23 +0200
Subject: [PATCH 082/142] rename

---
 pandas/plotting/_matplotlib/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 8862276161b5d..89c265ff2e844 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -427,7 +427,7 @@ def _compute_plot_data(self):
         if self.by is not None:
             self.subplots = True
             grouped_data = data.groupby(self.by)
-            data = self._transform_grouped_data(grouped_data)
+            data = self._reformat_grouped_data(grouped_data)
 
         # GH16953, _convert is needed as fallback, for ``Series``
         # with ``dtype == object``

From ce8cfd4551b0ed4ac3b150681401933bf9fd7f93 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 7 Apr 2020 22:45:16 +0200
Subject: [PATCH 083/142] code change on reviews

---
 pandas/plotting/_matplotlib/core.py | 101 +++++++++++++++++++++-------
 pandas/plotting/_matplotlib/hist.py |   2 +-
 pandas/tests/plotting/test_frame.py |   4 +-
 3 files changed, 82 insertions(+), 25 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 89c265ff2e844..1726473bef34e 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -1,5 +1,5 @@
 import re
-from typing import Optional
+from typing import Dict, Optional, Union
 import warnings
 
 import numpy as np
@@ -23,9 +23,8 @@
 )
 from pandas.core.dtypes.missing import isna, notna
 
-from pandas import MultiIndex
+from pandas import DataFrame, MultiIndex
 import pandas.core.common as com
-from pandas.core.groupby.generic import DataFrameGroupBy
 from pandas.core.reshape.concat import concat
 
 from pandas.io.formats.printing import pprint_thing
@@ -112,8 +111,17 @@ def __init__(
         import matplotlib.pyplot as plt
 
         self.data = data
-        self.by = by
-        self.columns = [column] if not isinstance(column, list) else column
+        self.by = [by] if not isinstance(by, list) or by is None else by
+
+        if self.by:
+            self._grouped_data_size = len(data.groupby(self.by))
+
+        # Assign the rest of columns into self.columns if by is explicitly defined
+        # while column is not, so as to keep the same behaviour with current df.hist
+        if self.by and column is None:
+            self.columns = [col for col in data.columns if col not in self.by]
+        else:
+            self.columns = [column] if not isinstance(column, list) else column
 
         self.kind = kind
 
@@ -241,17 +249,48 @@ def _validate_color_args(self):
                             "pass 'style' without a color symbol"
                         )
 
-    def _create_iter_data(self):
+    @staticmethod
+    def _create_iter_data_given_by(
+        data: ABCDataFrame, by: Optional[list]
+    ) -> Union[ABCDataFrame, Dict[str, Union[ABCDataFrame, ABCSeries]]]:
         """
-        Create data for iteration if `by` is assigned, and it is used in both
-        hist and boxplot.
+        Create data for iteration given `by` is assigned or not, and it is only
+        used in both hist and boxplot.
+
+        If `by` is assigned, return a dictionary of DataFrames in which the key of
+        dictionary is the values in groups.
+        If `by` is not assigned, return input as is, and this preserves current
+        status of iter_data.
+
+        Parameters
+        ----------
+        data: reformatted grouped data from `_compute_plot_data` method
+        by: list or None, value assigned to `by`.
+
+        Returns
+        -------
+        iter_data: DataFrame or Dictionary of DataFrames
+
+        Examples
+        --------
+        If `by` is assigned:
+
+        >>> tuples = [('h1', 'a'), ('h1', 'b'), ('h2', 'a'), ('h2', 'b')]
+        >>> mi = MultiIndex.from_tuples(tuples)
+        >>> value = [[1, 3, np.nan, np.nan],
+        ...          [3, 4, np.nan, np.nan], [np.nan, np.nan, 5, 6]]
+        >>> data = DataFrame(value, columns=mi)
+        >>> _create_iter_data_given_by(data, by=["col1"]) # doctest: +SKIP
+        {'h1': DataFrame({'a': [1, 3, np.nan], 'b': [3, 4, np.nan]}),
+         'h2': DataFrame({'a': [np.nan, np.nan, 5], 'b': [np.nan, np.nan, 6]})}
         """
-        iter_data = self.data
-        if self.by is not None:
-            # select sub-columns based on the value of first level of MI
-            cols = self.data.columns.levels[0]
+        if not by:
+            iter_data = data
+        else:
+            # Select sub-columns based on the value of first level of MI
+            cols = data.columns.levels[0]
             iter_data = {
-                col: self.data.loc[:, self.data.columns.get_level_values(0) == col]
+                col: data.loc[:, data.columns.get_level_values(0) == col]
                 for col in cols
             }
         return iter_data
@@ -393,16 +432,33 @@ def result(self):
             else:
                 return self.axes[0]
 
-    def _reformat_grouped_data(self, grouped: DataFrameGroupBy) -> ABCDataFrame:
+    def _reconstruct_data_with_by(self, data: ABCDataFrame) -> ABCDataFrame:
         """
-        Internal function to transform grouped DataFrame object to a normal
-        DataFrame to facilitate further manipulation.
-
-        The input is the original DataFrame to plot, and output is the reconstructed
-        DataFrame with MultiIndex columns. The first level of MI is unique values of
-        groups, and second level of MI is the columns selected by users.
+        Internal function to group data, and reassign multiindex column names onto the
+        result in order to let grouped data be used in _compute_plot_data method.
+
+        Parameters
+        ----------
+        data: Original DataFrame to plot
+
+        Returns
+        -------
+        Output is the reconstructed DataFrame with MultiIndex columns. The first level
+        of MI is unique values of groups, and second level of MI is the columns
+        selected by users.
+
+        Examples
+        --------
+        >>> d = {'h': ['h1', 'h1', 'h2'], 'a': [1, 3, 5], 'b': [3, 4, 6]}
+        >>> df = DataFrame(d)
+        >>> _reconstruct_data_with_by(df) # doctest: +SKIP
+           h1      h2
+           a   b   a   b
+        0  1   3   NaN NaN
+        1  3   4   NaN NaN
+        2  NaN NaN 5   6
         """
-        self._grouped_data_size = len(grouped)
+        grouped = data.groupby(self.by)
 
         data_list = []
         for key, group in grouped:
@@ -426,8 +482,7 @@ def _compute_plot_data(self):
         # GH15079 reconstruct data if by is defined
         if self.by is not None:
             self.subplots = True
-            grouped_data = data.groupby(self.by)
-            data = self._reformat_grouped_data(grouped_data)
+            data = self._reconstruct_data_with_by(self.data)
 
         # GH16953, _convert is needed as fallback, for ``Series``
         # with ``dtype == object``
diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index c5e6324fcc462..4259ca19014a8 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -81,7 +81,7 @@ def _plot(
     def _make_plot(self):
         colors = self._get_colors()
         stacking_id = self._get_stacking_id()
-        data = self._create_iter_data()
+        data = self._create_iter_data_given_by(self.data, self.by)
 
         for i, (label, y) in enumerate(self._iter_data(data=data)):
             ax = self._get_ax(i)
diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index 64ffd4cce0500..85ce3c5b63818 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -3275,7 +3275,7 @@ def test_subplots_sharex_false(self):
         tm.assert_numpy_array_equal(axs[1].get_xticks(), expected_ax2)
 
     @pytest.mark.parametrize("by", ["C", ["C", "D"]])
-    @pytest.mark.parametrize("column", ["A", ["A", "B"]])
+    @pytest.mark.parametrize("column", ["A", ["A", "B"], None])
     def test_hist_plot_by_argument(self, by, column, test_hist_df):
         # GH 15079
         _check_plot_works(test_hist_df.plot.hist, column=column, by=by)
@@ -3287,11 +3287,13 @@ def test_hist_plot_by_argument(self, by, column, test_hist_df):
             (["C"], "A", (2, 2), 3),
             ("C", "A", (2, 2), 3),
             (["C"], ["A"], (1, 3), 3),
+            ("C", None, (3, 1), 3),
             ("C", ["A", "B"], (3, 1), 3),
             (["C", "D"], "A", (9, 1), 9),
             (["C", "D"], "A", (3, 3), 9),
             (["C", "D"], ["A"], (5, 2), 9),
             (["C", "D"], ["A", "B"], (9, 1), 9),
+            (["C", "D"], None, (9, 1), 9),
             (["C", "D"], ["A", "B"], (5, 2), 9),
         ],
     )

From 627cc02e1770d10d270d21a4e39f08fab51c2ce2 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 7 Apr 2020 23:19:27 +0200
Subject: [PATCH 084/142] fixup

---
 pandas/plotting/_matplotlib/core.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 1726473bef34e..923b3150764ce 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -23,7 +23,7 @@
 )
 from pandas.core.dtypes.missing import isna, notna
 
-from pandas import DataFrame, MultiIndex
+from pandas import MultiIndex
 import pandas.core.common as com
 from pandas.core.reshape.concat import concat
 
@@ -111,7 +111,7 @@ def __init__(
         import matplotlib.pyplot as plt
 
         self.data = data
-        self.by = [by] if not isinstance(by, list) or by is None else by
+        self.by = [by] if not isinstance(by, list) and by is not None else by
 
         if self.by:
             self._grouped_data_size = len(data.groupby(self.by))
@@ -279,7 +279,7 @@ def _create_iter_data_given_by(
         >>> mi = MultiIndex.from_tuples(tuples)
         >>> value = [[1, 3, np.nan, np.nan],
         ...          [3, 4, np.nan, np.nan], [np.nan, np.nan, 5, 6]]
-        >>> data = DataFrame(value, columns=mi)
+        >>> data = DataFrame(value, columns=mi) # doctest: +SKIP
         >>> _create_iter_data_given_by(data, by=["col1"]) # doctest: +SKIP
         {'h1': DataFrame({'a': [1, 3, np.nan], 'b': [3, 4, np.nan]}),
          'h2': DataFrame({'a': [np.nan, np.nan, 5], 'b': [np.nan, np.nan, 6]})}

From ee8972d0cb753640877fb8e192364f3795631c72 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Fri, 10 Apr 2020 20:02:33 +0200
Subject: [PATCH 085/142] linting

---
 pandas/plotting/_matplotlib/hist.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 56104f64a8c8b..829b69ad99bdb 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -49,9 +49,7 @@ def _calculate_bins(self, data: ABCDataFrame) -> np.array:
         values = values[~isna(values)]
 
         hist, bins = np.histogram(
-            values,
-            bins=self.bins,
-            range=self.kwds.get("range", None)
+            values, bins=self.bins, range=self.kwds.get("range", None)
         )
         return bins
 

From 0839be219f81cc5537ca18f4bb7be0f9960c483c Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Fri, 1 May 2020 21:12:42 +0200
Subject: [PATCH 086/142] annotation

---
 pandas/plotting/_matplotlib/core.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 78fd598ee8e1d..d088e746d6cfc 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -4,6 +4,7 @@
 
 import numpy as np
 
+from pandas._typing import Label
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import cache_readonly
 
@@ -23,7 +24,7 @@
 )
 from pandas.core.dtypes.missing import isna, notna
 
-from pandas import MultiIndex
+from pandas import DataFrame, MultiIndex, Series
 import pandas.core.common as com
 from pandas.core.reshape.concat import concat
 
@@ -104,7 +105,7 @@ def __init__(
         table=False,
         layout=None,
         include_bool=False,
-        column=None,
+        column: Optional[Label] = None,
         **kwds,
     ):
 
@@ -251,8 +252,8 @@ def _validate_color_args(self):
 
     @staticmethod
     def _create_iter_data_given_by(
-        data: ABCDataFrame, by: Optional[list]
-    ) -> Union[ABCDataFrame, Dict[str, Union[ABCDataFrame, ABCSeries]]]:
+        data: DataFrame, by: Optional[list]
+    ) -> Union[DataFrame, Dict[str, Union[DataFrame, Series]]]:
         """
         Create data for iteration given `by` is assigned or not, and it is only
         used in both hist and boxplot.
@@ -432,7 +433,7 @@ def result(self):
             else:
                 return self.axes[0]
 
-    def _reconstruct_data_with_by(self, data: ABCDataFrame) -> ABCDataFrame:
+    def _reconstruct_data_with_by(self, data: DataFrame) -> DataFrame:
         """
         Internal function to group data, and reassign multiindex column names onto the
         result in order to let grouped data be used in _compute_plot_data method.

From 142ee532f8cad03d9206d61ce6e58f6830d13a7d Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Fri, 1 May 2020 21:34:49 +0200
Subject: [PATCH 087/142] annotation

---
 pandas/plotting/_matplotlib/hist.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 829b69ad99bdb..1970bb570fced 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -8,6 +8,7 @@
 
 import pandas.core.common as com
 from pandas.core.series import Series
+from pandas.core.frame import DataFrame
 
 from pandas.io.formats.printing import pprint_thing
 from pandas.plotting._matplotlib.core import LinePlot, MPLPlot
@@ -41,7 +42,7 @@ def _args_adjust(self):
         if is_list_like(self.bottom):
             self.bottom = np.array(self.bottom)
 
-    def _calculate_bins(self, data: ABCDataFrame) -> np.array:
+    def _calculate_bins(self, data: DataFrame) -> np.array:
         """Calculate bins given data"""
 
         values = data._convert(datetime=True)._get_numeric_data()

From 2710cf20594fc7e9f5cef79582761465ea36a6ee Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 5 May 2020 16:11:07 +0200
Subject: [PATCH 088/142] fixup

---
 pandas/plotting/_matplotlib/core.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 84d9ec206c5e2..60879ae9a3f76 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -1,5 +1,5 @@
 import re
-from typing import Dict, Optional, Union
+from typing import Dict, List, Optional, Union
 import warnings
 
 import numpy as np
@@ -168,8 +168,8 @@ def __init__(
 
         self.grid = grid
         self.legend = legend
-        self.legend_handles = []
-        self.legend_labels = []
+        self.legend_handles: List = []
+        self.legend_labels: List = []
 
         for attr in self._pop_attributes:
             value = kwds.pop(attr, self._attr_defaults.get(attr, None))
@@ -251,7 +251,7 @@ def _validate_color_args(self):
 
     @staticmethod
     def _create_iter_data_given_by(
-        data: DataFrame, by: Optional[list]
+        data: DataFrame, by: Optional[List]
     ) -> Union[DataFrame, Dict[str, Union[DataFrame, Series]]]:
         """
         Create data for iteration given `by` is assigned or not, and it is only
@@ -279,16 +279,19 @@ def _create_iter_data_given_by(
         >>> mi = MultiIndex.from_tuples(tuples)
         >>> value = [[1, 3, np.nan, np.nan],
         ...          [3, 4, np.nan, np.nan], [np.nan, np.nan, 5, 6]]
-        >>> data = DataFrame(value, columns=mi) # doctest: +SKIP
+        >>> data = DataFrame(value, columns=mi)
         >>> _create_iter_data_given_by(data, by=["col1"]) # doctest: +SKIP
         {'h1': DataFrame({'a': [1, 3, np.nan], 'b': [3, 4, np.nan]}),
          'h2': DataFrame({'a': [np.nan, np.nan, 5], 'b': [np.nan, np.nan, 6]})}
         """
+        iter_data: Union[DataFrame, Dict[str, Union[DataFrame, Series]]]
         if not by:
             iter_data = data
         else:
             # Select sub-columns based on the value of first level of MI
-            cols = data.columns.levels[0]
+            # TODO: mypy complains because Index does not have levels, only MI has.
+            cols = data.columns.levels[0]  # type: ignore
+            print(data.columns.get_level_values(0))
             iter_data = {
                 col: data.loc[:, data.columns.get_level_values(0) == col]
                 for col in cols

From f76d2cbfbfab61fcd04a973fc6d22cab5109bad4 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 5 May 2020 16:12:01 +0200
Subject: [PATCH 089/142] remove

---
 pandas/plotting/_matplotlib/core.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 60879ae9a3f76..d5460e9e3c493 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -291,7 +291,6 @@ def _create_iter_data_given_by(
             # Select sub-columns based on the value of first level of MI
             # TODO: mypy complains because Index does not have levels, only MI has.
             cols = data.columns.levels[0]  # type: ignore
-            print(data.columns.get_level_values(0))
             iter_data = {
                 col: data.loc[:, data.columns.get_level_values(0) == col]
                 for col in cols

From a5ecbd70116d96594a92f0d82d7f3397e4ed0e2a Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Mon, 11 May 2020 19:01:56 +0200
Subject: [PATCH 090/142] add missing annoatation

---
 pandas/plotting/_matplotlib/core.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index d5460e9e3c493..7205680461260 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -4,6 +4,7 @@
 
 import numpy as np
 
+from pandas._typing import Label
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import cache_readonly
 
@@ -104,7 +105,7 @@ def __init__(
         table=False,
         layout=None,
         include_bool=False,
-        column=None,
+        column: Optional[Label] = None,
         **kwds,
     ):
 

From 7425dff980de752c943b47f2161d4c2701a16d3d Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Thu, 21 May 2020 20:08:34 +0200
Subject: [PATCH 091/142] code change on WA review

---
 pandas/plotting/_matplotlib/core.py | 7 ++++---
 pandas/plotting/_matplotlib/hist.py | 6 +++++-
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 0d73eacbcfc76..9106b0cb0789c 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -2,6 +2,7 @@
 from typing import Dict, List, Optional, Union
 import warnings
 
+from matplotlib.artist import Artist
 import numpy as np
 
 from pandas._typing import Label
@@ -112,7 +113,7 @@ def __init__(
         import matplotlib.pyplot as plt
 
         self.data = data
-        self.by = [by] if not isinstance(by, list) and by is not None else by
+        self.by = com.maybe_make_list(by)
 
         if self.by:
             self._grouped_data_size = len(data.groupby(self.by))
@@ -169,8 +170,8 @@ def __init__(
 
         self.grid = grid
         self.legend = legend
-        self.legend_handles: List = []
-        self.legend_labels: List = []
+        self.legend_handles: List[Artist] = []
+        self.legend_labels: List[Label] = []
 
         for attr in self._pop_attributes:
             value = kwds.pop(attr, self._attr_defaults.get(attr, None))
diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 61cbbc991ebe5..954e4ddb1d814 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -119,7 +119,11 @@ def _make_plot(self):
             self._add_legend_handle(artists[0], label, index=i)
 
     def _reformat_y(self, y: Union[Series, np.array]) -> Union[Series, np.array]:
-        """Internal function to reformat y given `by` is applied or not."""
+        """Internal function to reformat y given `by` is applied or not.
+
+        If by is None, input y is 1-d array; and if by is not None, groupby will take
+        place and input y is multi-dimensional array.
+        """
         if self.by is not None and len(y.shape) > 1:
             notna = [col[~isna(col)] for col in y.T]
             y = np.array(np.array(notna).T)

From b06e454c48c727c1660c7628d915626f0eeb6917 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Fri, 19 Jun 2020 20:26:31 +0200
Subject: [PATCH 092/142] solve mypy

---
 pandas/plotting/_matplotlib/core.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 4b8c8b8f70576..9cce2a8e87f78 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -284,7 +284,7 @@ def _create_iter_data_given_by(
         >>> value = [[1, 3, np.nan, np.nan],
         ...          [3, 4, np.nan, np.nan], [np.nan, np.nan, 5, 6]]
         >>> data = DataFrame(value, columns=mi)
-        >>> _create_iter_data_given_by(data, by=["col1"]) # doctest: +SKIP
+        >>> _create_iter_data_given_by(data, by=["col1"])
         {'h1': DataFrame({'a': [1, 3, np.nan], 'b': [3, 4, np.nan]}),
          'h2': DataFrame({'a': [np.nan, np.nan, 5], 'b': [np.nan, np.nan, 6]})}
         """
@@ -293,8 +293,8 @@ def _create_iter_data_given_by(
             iter_data = data
         else:
             # Select sub-columns based on the value of first level of MI
-            # TODO: mypy complains because Index does not have levels, only MI has.
-            cols = data.columns.levels[0]  # type: ignore
+            assert isinstance(data, MultiIndex)
+            cols = data.columns.levels[0]
             iter_data = {
                 col: data.loc[:, data.columns.get_level_values(0) == col]
                 for col in cols

From 79294eddb89cbe4ca884bc2f48a501902b2642b9 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Fri, 19 Jun 2020 21:37:41 +0200
Subject: [PATCH 093/142] fix typo

---
 pandas/plotting/_matplotlib/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 9cce2a8e87f78..129f47d0d4718 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -293,7 +293,7 @@ def _create_iter_data_given_by(
             iter_data = data
         else:
             # Select sub-columns based on the value of first level of MI
-            assert isinstance(data, MultiIndex)
+            assert isinstance(data.columns, MultiIndex)
             cols = data.columns.levels[0]
             iter_data = {
                 col: data.loc[:, data.columns.get_level_values(0) == col]
@@ -457,7 +457,7 @@ def _reconstruct_data_with_by(self, data: DataFrame) -> DataFrame:
         --------
         >>> d = {'h': ['h1', 'h1', 'h2'], 'a': [1, 3, 5], 'b': [3, 4, 6]}
         >>> df = DataFrame(d)
-        >>> _reconstruct_data_with_by(df) # doctest: +SKIP
+        >>> _reconstruct_data_with_by(df)
            h1      h2
            a   b   a   b
         0  1   3   NaN NaN

From add406f08120532105bebe1fb9465565fae85fbf Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 23 Jun 2020 20:25:25 +0200
Subject: [PATCH 094/142] code change on reviews

---
 pandas/plotting/_matplotlib/core.py    | 94 ++-----------------------
 pandas/plotting/_matplotlib/grouped.py | 97 ++++++++++++++++++++++++++
 pandas/plotting/_matplotlib/hist.py    |  9 +--
 3 files changed, 104 insertions(+), 96 deletions(-)
 create mode 100644 pandas/plotting/_matplotlib/grouped.py

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 129f47d0d4718..09c647ceddb3f 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -27,13 +27,12 @@
 )
 from pandas.core.dtypes.missing import isna, notna
 
-from pandas import DataFrame, MultiIndex, Series
 import pandas.core.common as com
-from pandas.core.reshape.concat import concat
 
 from pandas.io.formats.printing import pprint_thing
 from pandas.plotting._matplotlib.compat import _mpl_ge_3_0_0
 from pandas.plotting._matplotlib.converter import register_pandas_matplotlib_converters
+from pandas.plotting._matplotlib.grouped import reconstruct_data_with_by
 from pandas.plotting._matplotlib.style import _get_standard_colors
 from pandas.plotting._matplotlib.tools import (
     _flatten,
@@ -125,7 +124,8 @@ def __init__(
         if self.by and column is None:
             self.columns = [col for col in data.columns if col not in self.by]
         else:
-            self.columns = [column] if not isinstance(column, list) else column
+            self.columns = com.convert_to_list_like(column)
+            # self.columns = [column] if not isinstance(column, list) else column
 
         self.kind = kind
 
@@ -253,54 +253,6 @@ def _validate_color_args(self):
                             "pass 'style' without a color symbol"
                         )
 
-    @staticmethod
-    def _create_iter_data_given_by(
-        data: DataFrame, by: Optional[List]
-    ) -> Union[DataFrame, Dict[str, Union[DataFrame, Series]]]:
-        """
-        Create data for iteration given `by` is assigned or not, and it is only
-        used in both hist and boxplot.
-
-        If `by` is assigned, return a dictionary of DataFrames in which the key of
-        dictionary is the values in groups.
-        If `by` is not assigned, return input as is, and this preserves current
-        status of iter_data.
-
-        Parameters
-        ----------
-        data: reformatted grouped data from `_compute_plot_data` method
-        by: list or None, value assigned to `by`.
-
-        Returns
-        -------
-        iter_data: DataFrame or Dictionary of DataFrames
-
-        Examples
-        --------
-        If `by` is assigned:
-
-        >>> tuples = [('h1', 'a'), ('h1', 'b'), ('h2', 'a'), ('h2', 'b')]
-        >>> mi = MultiIndex.from_tuples(tuples)
-        >>> value = [[1, 3, np.nan, np.nan],
-        ...          [3, 4, np.nan, np.nan], [np.nan, np.nan, 5, 6]]
-        >>> data = DataFrame(value, columns=mi)
-        >>> _create_iter_data_given_by(data, by=["col1"])
-        {'h1': DataFrame({'a': [1, 3, np.nan], 'b': [3, 4, np.nan]}),
-         'h2': DataFrame({'a': [np.nan, np.nan, 5], 'b': [np.nan, np.nan, 6]})}
-        """
-        iter_data: Union[DataFrame, Dict[str, Union[DataFrame, Series]]]
-        if not by:
-            iter_data = data
-        else:
-            # Select sub-columns based on the value of first level of MI
-            assert isinstance(data.columns, MultiIndex)
-            cols = data.columns.levels[0]
-            iter_data = {
-                col: data.loc[:, data.columns.get_level_values(0) == col]
-                for col in cols
-            }
-        return iter_data
-
     def _iter_data(self, data=None, keep_index=False, fillna=None):
         if data is None:
             data = self.data
@@ -438,44 +390,6 @@ def result(self):
             else:
                 return self.axes[0]
 
-    def _reconstruct_data_with_by(self, data: DataFrame) -> DataFrame:
-        """
-        Internal function to group data, and reassign multiindex column names onto the
-        result in order to let grouped data be used in _compute_plot_data method.
-
-        Parameters
-        ----------
-        data: Original DataFrame to plot
-
-        Returns
-        -------
-        Output is the reconstructed DataFrame with MultiIndex columns. The first level
-        of MI is unique values of groups, and second level of MI is the columns
-        selected by users.
-
-        Examples
-        --------
-        >>> d = {'h': ['h1', 'h1', 'h2'], 'a': [1, 3, 5], 'b': [3, 4, 6]}
-        >>> df = DataFrame(d)
-        >>> _reconstruct_data_with_by(df)
-           h1      h2
-           a   b   a   b
-        0  1   3   NaN NaN
-        1  3   4   NaN NaN
-        2  NaN NaN 5   6
-        """
-        grouped = data.groupby(self.by)
-
-        data_list = []
-        for key, group in grouped:
-            columns = MultiIndex.from_product([[key], self.columns])
-            sub_group = group[self.columns]
-            sub_group.columns = columns
-            data_list.append(sub_group)
-
-        data = concat(data_list, axis=1)
-        return data
-
     def _compute_plot_data(self):
         data = self.data
 
@@ -488,7 +402,7 @@ def _compute_plot_data(self):
         # GH15079 reconstruct data if by is defined
         if self.by is not None:
             self.subplots = True
-            data = self._reconstruct_data_with_by(self.data)
+            data = reconstruct_data_with_by(self.data, by=self.by, cols=self.columns)
 
         # GH16953, _convert is needed as fallback, for ``Series``
         # with ``dtype == object``
diff --git a/pandas/plotting/_matplotlib/grouped.py b/pandas/plotting/_matplotlib/grouped.py
new file mode 100644
index 0000000000000..cac6189d7c91b
--- /dev/null
+++ b/pandas/plotting/_matplotlib/grouped.py
@@ -0,0 +1,97 @@
+from typing import Dict, List, Optional, Union
+
+import numpy as np
+
+from pandas._typing import Label
+
+from pandas import DataFrame, MultiIndex, Series, concat
+
+
+def create_iter_data_given_by(
+    data: DataFrame, by: Optional[List]
+) -> Union[DataFrame, Dict[str, Union[DataFrame, Series]]]:
+    """
+    Create data for iteration given `by` is assigned or not, and it is only
+    used in both hist and boxplot.
+
+    If `by` is assigned, return a dictionary of DataFrames in which the key of
+    dictionary is the values in groups.
+    If `by` is not assigned, return input as is, and this preserves current
+    status of iter_data.
+
+    Parameters
+    ----------
+    data: reformatted grouped data from `_compute_plot_data` method
+    by: list or None, value assigned to `by`.
+
+    Returns
+    -------
+    iter_data: DataFrame or Dictionary of DataFrames
+
+    Examples
+    --------
+    If `by` is assigned:
+
+    >>> tuples = [('h1', 'a'), ('h1', 'b'), ('h2', 'a'), ('h2', 'b')]
+    >>> mi = MultiIndex.from_tuples(tuples)
+    >>> value = [[1, 3, np.nan, np.nan],
+    ...          [3, 4, np.nan, np.nan], [np.nan, np.nan, 5, 6]]
+    >>> data = DataFrame(value, columns=mi)
+    >>> create_iter_data_given_by(data, by=["col1"])
+    {'h1': DataFrame({'a': [1, 3, np.nan], 'b': [3, 4, np.nan]}),
+     'h2': DataFrame({'a': [np.nan, np.nan, 5], 'b': [np.nan, np.nan, 6]})}
+    """
+    iter_data: Union[DataFrame, Dict[str, Union[DataFrame, Series]]]
+    if not by:
+        iter_data = data
+    else:
+        # Select sub-columns based on the value of first level of MI
+        assert isinstance(data.columns, MultiIndex)
+        cols = data.columns.levels[0]
+        iter_data = {
+            col: data.loc[:, data.columns.get_level_values(0) == col] for col in cols
+        }
+    return iter_data
+
+
+def reconstruct_data_with_by(
+    data: DataFrame, by: Union[Label, List[Label]], cols: List[Label]
+) -> DataFrame:
+    """
+    Internal function to group data, and reassign multiindex column names onto the
+    result in order to let grouped data be used in _compute_plot_data method.
+
+    Parameters
+    ----------
+    data: Original DataFrame to plot
+    by: grouped `by` parameter selected by users
+    cols: columns of data set (excluding columns used in `by`)
+
+    Returns
+    -------
+    Output is the reconstructed DataFrame with MultiIndex columns. The first level
+    of MI is unique values of groups, and second level of MI is the columns
+    selected by users.
+
+    Examples
+    --------
+    >>> d = {'h': ['h1', 'h1', 'h2'], 'a': [1, 3, 5], 'b': [3, 4, 6]}
+    >>> df = DataFrame(d)
+    >>> reconstruct_data_with_by(df, by='h', cols=['a', 'b'])
+       h1      h2
+       a   b   a   b
+    0  1   3   NaN NaN
+    1  3   4   NaN NaN
+    2  NaN NaN 5   6
+    """
+    grouped = data.groupby(by)
+
+    data_list = []
+    for key, group in grouped:
+        columns = MultiIndex.from_product([[key], cols])
+        sub_group = group[cols]
+        sub_group.columns = columns
+        data_list.append(sub_group)
+
+    data = concat(data_list, axis=1)
+    return data
diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 954e4ddb1d814..363d78ccaa5f3 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -11,6 +11,7 @@
 
 from pandas.io.formats.printing import pprint_thing
 from pandas.plotting._matplotlib.core import LinePlot, MPLPlot
+from pandas.plotting._matplotlib.grouped import create_iter_data_given_by
 from pandas.plotting._matplotlib.tools import _flatten, _set_ticks_props, _subplots
 
 
@@ -33,17 +34,13 @@ def _args_adjust(self):
 
             else:
                 grouped = self.data.groupby(self.by)[self.columns]
-                bins_list = []
-                for key, group in grouped:
-                    bins_list.append(self._calculate_bins(group))
-                self.bins = bins_list
+                self.bins = [self._calculate_bins(group) for key, group in grouped]
 
         if is_list_like(self.bottom):
             self.bottom = np.array(self.bottom)
 
     def _calculate_bins(self, data: DataFrame) -> np.array:
         """Calculate bins given data"""
-
         values = data._convert(datetime=True)._get_numeric_data()
         values = np.ravel(values)
         values = values[~isna(values)]
@@ -78,7 +75,7 @@ def _plot(
     def _make_plot(self):
         colors = self._get_colors()
         stacking_id = self._get_stacking_id()
-        data = self._create_iter_data_given_by(self.data, self.by)
+        data = create_iter_data_given_by(self.data, self.by)
 
         for i, (label, y) in enumerate(self._iter_data(data=data)):
             ax = self._get_ax(i)

From bb22c533e144b90f26298a238d7228b295a2a168 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Tue, 23 Jun 2020 21:00:31 +0200
Subject: [PATCH 095/142] fix linting

---
 pandas/plotting/_matplotlib/core.py    | 3 +--
 pandas/plotting/_matplotlib/grouped.py | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 09c647ceddb3f..286d2783ccb2b 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -1,5 +1,5 @@
 import re
-from typing import Dict, List, Optional, Union
+from typing import List, Optional
 import warnings
 
 from matplotlib.artist import Artist
@@ -125,7 +125,6 @@ def __init__(
             self.columns = [col for col in data.columns if col not in self.by]
         else:
             self.columns = com.convert_to_list_like(column)
-            # self.columns = [column] if not isinstance(column, list) else column
 
         self.kind = kind
 
diff --git a/pandas/plotting/_matplotlib/grouped.py b/pandas/plotting/_matplotlib/grouped.py
index cac6189d7c91b..3373c1d247449 100644
--- a/pandas/plotting/_matplotlib/grouped.py
+++ b/pandas/plotting/_matplotlib/grouped.py
@@ -1,7 +1,5 @@
 from typing import Dict, List, Optional, Union
 
-import numpy as np
-
 from pandas._typing import Label
 
 from pandas import DataFrame, MultiIndex, Series, concat
@@ -32,6 +30,7 @@ def create_iter_data_given_by(
     --------
     If `by` is assigned:
 
+    >>> import numpy as np
     >>> tuples = [('h1', 'a'), ('h1', 'b'), ('h2', 'a'), ('h2', 'b')]
     >>> mi = MultiIndex.from_tuples(tuples)
     >>> value = [[1, 3, np.nan, np.nan],

From 25214e6ce159500368113c3a59ada21c94928349 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 27 Jun 2020 21:41:25 +0200
Subject: [PATCH 096/142] rename

---
 pandas/plotting/_matplotlib/core.py                    | 2 +-
 pandas/plotting/_matplotlib/{grouped.py => groupby.py} | 0
 pandas/plotting/_matplotlib/hist.py                    | 2 +-
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename pandas/plotting/_matplotlib/{grouped.py => groupby.py} (100%)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 286d2783ccb2b..1a3cf9b906dc6 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -32,7 +32,7 @@
 from pandas.io.formats.printing import pprint_thing
 from pandas.plotting._matplotlib.compat import _mpl_ge_3_0_0
 from pandas.plotting._matplotlib.converter import register_pandas_matplotlib_converters
-from pandas.plotting._matplotlib.grouped import reconstruct_data_with_by
+from pandas.plotting._matplotlib.groupby import reconstruct_data_with_by
 from pandas.plotting._matplotlib.style import _get_standard_colors
 from pandas.plotting._matplotlib.tools import (
     _flatten,
diff --git a/pandas/plotting/_matplotlib/grouped.py b/pandas/plotting/_matplotlib/groupby.py
similarity index 100%
rename from pandas/plotting/_matplotlib/grouped.py
rename to pandas/plotting/_matplotlib/groupby.py
diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 363d78ccaa5f3..417995eb18451 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -11,7 +11,7 @@
 
 from pandas.io.formats.printing import pprint_thing
 from pandas.plotting._matplotlib.core import LinePlot, MPLPlot
-from pandas.plotting._matplotlib.grouped import create_iter_data_given_by
+from pandas.plotting._matplotlib.groupby import create_iter_data_given_by
 from pandas.plotting._matplotlib.tools import _flatten, _set_ticks_props, _subplots
 
 

From 77e46f4cf5b9f13e79226d444a54024a5b159b9c Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 27 Jun 2020 21:48:38 +0200
Subject: [PATCH 097/142] modulize reformat_y for hist

---
 pandas/plotting/_matplotlib/groupby.py | 20 ++++++++++++++++++++
 pandas/plotting/_matplotlib/hist.py    | 21 +++++----------------
 2 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py
index 3373c1d247449..33165709c2af5 100644
--- a/pandas/plotting/_matplotlib/groupby.py
+++ b/pandas/plotting/_matplotlib/groupby.py
@@ -1,7 +1,11 @@
 from typing import Dict, List, Optional, Union
 
+import numpy as np
+
 from pandas._typing import Label
 
+from pandas.core.dtypes.missing import isna
+
 from pandas import DataFrame, MultiIndex, Series, concat
 
 
@@ -94,3 +98,19 @@ def reconstruct_data_with_by(
 
     data = concat(data_list, axis=1)
     return data
+
+
+def reformat_hist_y_given_by(
+    y: Union[Series, np.array], by: Optional[Union[Label, List[Label]]]
+) -> Union[Series, np.array]:
+    """Internal function to reformat y given `by` is applied or not for hist plot.
+
+    If by is None, input y is 1-d array; and if by is not None, groupby will take
+    place and input y is multi-dimensional array.
+    """
+    if by is not None and len(y.shape) > 1:
+        notna = [col[~isna(col)] for col in y.T]
+        y = np.array(np.array(notna).T)
+    else:
+        y = y[~isna(y)]
+    return y
diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 8556725d15df9..e80e0519b725e 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -7,11 +7,13 @@
 from pandas.core.dtypes.missing import isna, remove_na_arraylike
 
 from pandas.core.frame import DataFrame
-from pandas.core.series import Series
 
 from pandas.io.formats.printing import pprint_thing
 from pandas.plotting._matplotlib.core import LinePlot, MPLPlot
-from pandas.plotting._matplotlib.groupby import create_iter_data_given_by
+from pandas.plotting._matplotlib.groupby import (
+    create_iter_data_given_by,
+    reformat_hist_y_given_by,
+)
 from pandas.plotting._matplotlib.tools import _flatten, _set_ticks_props, _subplots
 
 
@@ -98,7 +100,7 @@ def _make_plot(self):
                 kwds["label"] = self.columns
                 kwds.pop("color")
 
-            y = self._reformat_y(y)
+            y = reformat_hist_y_given_by(y, self.by)
 
             # We allow weights to be a multi-dimensional array, e.g. a (10, 2) array,
             # and each sub-array (10,) will be called in each iteration. If users only
@@ -115,19 +117,6 @@ def _make_plot(self):
 
             self._add_legend_handle(artists[0], label, index=i)
 
-    def _reformat_y(self, y: Union[Series, np.array]) -> Union[Series, np.array]:
-        """Internal function to reformat y given `by` is applied or not.
-
-        If by is None, input y is 1-d array; and if by is not None, groupby will take
-        place and input y is multi-dimensional array.
-        """
-        if self.by is not None and len(y.shape) > 1:
-            notna = [col[~isna(col)] for col in y.T]
-            y = np.array(np.array(notna).T)
-        else:
-            y = y[~isna(y)]
-        return y
-
     def _make_plot_keywords(self, kwds, y):
         """merge BoxPlot/KdePlot properties to passed kwds"""
         # y is required for KdePlot

From 9de9c617d652f21075ac7eec17850d8d7793abe9 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 27 Jun 2020 21:52:25 +0200
Subject: [PATCH 098/142] better annotation

---
 pandas/plotting/_matplotlib/groupby.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py
index 33165709c2af5..c4795e35e0004 100644
--- a/pandas/plotting/_matplotlib/groupby.py
+++ b/pandas/plotting/_matplotlib/groupby.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 
-from pandas._typing import Label
+from pandas._typing import FrameOrSeriesUnion, Label
 
 from pandas.core.dtypes.missing import isna
 
@@ -11,7 +11,7 @@
 
 def create_iter_data_given_by(
     data: DataFrame, by: Optional[List]
-) -> Union[DataFrame, Dict[str, Union[DataFrame, Series]]]:
+) -> Union[DataFrame, Dict[str, FrameOrSeriesUnion]]:
     """
     Create data for iteration given `by` is assigned or not, and it is only
     used in both hist and boxplot.
@@ -44,7 +44,7 @@ def create_iter_data_given_by(
     {'h1': DataFrame({'a': [1, 3, np.nan], 'b': [3, 4, np.nan]}),
      'h2': DataFrame({'a': [np.nan, np.nan, 5], 'b': [np.nan, np.nan, 6]})}
     """
-    iter_data: Union[DataFrame, Dict[str, Union[DataFrame, Series]]]
+    iter_data: Union[DataFrame, Dict[str, FrameOrSeriesUnion]]
     if not by:
         iter_data = data
     else:

From af68d2ea647d6fb03d63b6eaaf8d495e66bce18b Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 27 Jun 2020 21:52:57 +0200
Subject: [PATCH 099/142] improve annotation

---
 pandas/plotting/_matplotlib/groupby.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py
index c4795e35e0004..047b10ee78fc5 100644
--- a/pandas/plotting/_matplotlib/groupby.py
+++ b/pandas/plotting/_matplotlib/groupby.py
@@ -10,7 +10,7 @@
 
 
 def create_iter_data_given_by(
-    data: DataFrame, by: Optional[List]
+    data: DataFrame, by: Optional[List[Label]]
 ) -> Union[DataFrame, Dict[str, FrameOrSeriesUnion]]:
     """
     Create data for iteration given `by` is assigned or not, and it is only

From b75015ac9b8d93761b7e21e82e10c5e892c5b074 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 27 Jun 2020 21:53:53 +0200
Subject: [PATCH 100/142] fix linting

---
 pandas/plotting/_matplotlib/hist.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index e80e0519b725e..c3f4ceff9f904 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -1,5 +1,3 @@
-from typing import Union
-
 import numpy as np
 
 from pandas.core.dtypes.common import is_integer, is_list_like

From b90303d930be5f389f395f3bfbd259e0eb4bcf69 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 27 Jun 2020 22:10:10 +0200
Subject: [PATCH 101/142] improve docstring

---
 pandas/plotting/_matplotlib/groupby.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py
index 047b10ee78fc5..061f95aacec90 100644
--- a/pandas/plotting/_matplotlib/groupby.py
+++ b/pandas/plotting/_matplotlib/groupby.py
@@ -105,8 +105,8 @@ def reformat_hist_y_given_by(
 ) -> Union[Series, np.array]:
     """Internal function to reformat y given `by` is applied or not for hist plot.
 
-    If by is None, input y is 1-d array; and if by is not None, groupby will take
-    place and input y is multi-dimensional array.
+    If by is None, input y is 1-d with NaN removed; and if by is not None, groupby
+    will take place and input y is multi-dimensional array.
     """
     if by is not None and len(y.shape) > 1:
         notna = [col[~isna(col)] for col in y.T]

From 2ac32f5b4cf838e9b8ba653bd9bdd91bebd0975b Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Fri, 21 May 2021 20:23:48 +0200
Subject: [PATCH 102/142] remove added test file

---
 pandas/tests/plotting/test_frame.py   | 3533 -------------------------
 pandas/tests/plotting/test_hist_by.py |  112 +
 2 files changed, 112 insertions(+), 3533 deletions(-)
 delete mode 100644 pandas/tests/plotting/test_frame.py
 create mode 100644 pandas/tests/plotting/test_hist_by.py

diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
deleted file mode 100644
index 4392ef6d04d34..0000000000000
--- a/pandas/tests/plotting/test_frame.py
+++ /dev/null
@@ -1,3533 +0,0 @@
-""" Test cases for DataFrame.plot """
-
-from datetime import date, datetime
-import itertools
-import re
-import string
-import warnings
-
-import numpy as np
-from numpy.random import rand, randn
-import pytest
-
-import pandas.util._test_decorators as td
-
-from pandas.core.dtypes.api import is_list_like
-
-import pandas as pd
-from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range
-import pandas._testing as tm
-from pandas.core.arrays import integer_array
-from pandas.tests.plotting.common import TestPlotBase, _check_plot_works
-
-from pandas.io.formats.printing import pprint_thing
-import pandas.plotting as plotting
-
-
-@pytest.fixture(scope="module")
-def test_hist_with_by_df():
-    np.random.seed(0)
-    df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
-    df["C"] = np.random.choice(["a", "b", "c"], 30)
-    df["D"] = np.random.choice(["a", "b", "c"], 30)
-    return df
-
-
-@td.skip_if_no_mpl
-class TestDataFramePlots(TestPlotBase):
-    def setup_method(self, method):
-        TestPlotBase.setup_method(self, method)
-        import matplotlib as mpl
-
-        mpl.rcdefaults()
-
-        self.tdf = tm.makeTimeDataFrame()
-        self.hexbin_df = DataFrame(
-            {
-                "A": np.random.uniform(size=20),
-                "B": np.random.uniform(size=20),
-                "C": np.arange(20) + np.random.uniform(size=20),
-            }
-        )
-
-    def _assert_ytickslabels_visibility(self, axes, expected):
-        for ax, exp in zip(axes, expected):
-            self._check_visible(ax.get_yticklabels(), visible=exp)
-
-    def _assert_xtickslabels_visibility(self, axes, expected):
-        for ax, exp in zip(axes, expected):
-            self._check_visible(ax.get_xticklabels(), visible=exp)
-
-    @pytest.mark.xfail(reason="Waiting for PR 34334", strict=True)
-    @pytest.mark.slow
-    def test_plot(self):
-        from pandas.plotting._matplotlib.compat import _mpl_ge_3_1_0
-
-        df = self.tdf
-        _check_plot_works(df.plot, grid=False)
-        # _check_plot_works adds an ax so catch warning. see GH #13188
-        with tm.assert_produces_warning(UserWarning):
-            axes = _check_plot_works(df.plot, subplots=True)
-        self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
-
-        with tm.assert_produces_warning(UserWarning):
-            axes = _check_plot_works(df.plot, subplots=True, layout=(-1, 2))
-        self._check_axes_shape(axes, axes_num=4, layout=(2, 2))
-
-        with tm.assert_produces_warning(UserWarning):
-            axes = _check_plot_works(df.plot, subplots=True, use_index=False)
-        self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
-
-        df = DataFrame({"x": [1, 2], "y": [3, 4]})
-        if _mpl_ge_3_1_0():
-            msg = "'Line2D' object has no property 'blarg'"
-        else:
-            msg = "Unknown property blarg"
-        with pytest.raises(AttributeError, match=msg):
-            df.plot.line(blarg=True)
-
-        df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10]))
-
-        _check_plot_works(df.plot, use_index=True)
-        _check_plot_works(df.plot, sort_columns=False)
-        _check_plot_works(df.plot, yticks=[1, 5, 10])
-        _check_plot_works(df.plot, xticks=[1, 5, 10])
-        _check_plot_works(df.plot, ylim=(-100, 100), xlim=(-100, 100))
-
-        with tm.assert_produces_warning(UserWarning):
-            _check_plot_works(df.plot, subplots=True, title="blah")
-
-        # We have to redo it here because _check_plot_works does two plots,
-        # once without an ax kwarg and once with an ax kwarg and the new sharex
-        # behaviour does not remove the visibility of the latter axis (as ax is
-        # present).  see: https://github.com/pandas-dev/pandas/issues/9737
-
-        axes = df.plot(subplots=True, title="blah")
-        self._check_axes_shape(axes, axes_num=3, layout=(3, 1))
-        # axes[0].figure.savefig("test.png")
-        for ax in axes[:2]:
-            self._check_visible(ax.xaxis)  # xaxis must be visible for grid
-            self._check_visible(ax.get_xticklabels(), visible=False)
-            self._check_visible(ax.get_xticklabels(minor=True), visible=False)
-            self._check_visible([ax.xaxis.get_label()], visible=False)
-        for ax in [axes[2]]:
-            self._check_visible(ax.xaxis)
-            self._check_visible(ax.get_xticklabels())
-            self._check_visible([ax.xaxis.get_label()])
-            self._check_ticks_props(ax, xrot=0)
-
-        _check_plot_works(df.plot, title="blah")
-
-        tuples = zip(string.ascii_letters[:10], range(10))
-        df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples))
-        _check_plot_works(df.plot, use_index=True)
-
-        # unicode
-        index = MultiIndex.from_tuples(
-            [
-                ("\u03b1", 0),
-                ("\u03b1", 1),
-                ("\u03b2", 2),
-                ("\u03b2", 3),
-                ("\u03b3", 4),
-                ("\u03b3", 5),
-                ("\u03b4", 6),
-                ("\u03b4", 7),
-            ],
-            names=["i0", "i1"],
-        )
-        columns = MultiIndex.from_tuples(
-            [("bar", "\u0394"), ("bar", "\u0395")], names=["c0", "c1"]
-        )
-        df = DataFrame(np.random.randint(0, 10, (8, 2)), columns=columns, index=index)
-        _check_plot_works(df.plot, title="\u03A3")
-
-        # GH 6951
-        # Test with single column
-        df = DataFrame({"x": np.random.rand(10)})
-        axes = _check_plot_works(df.plot.bar, subplots=True)
-        self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
-
-        axes = _check_plot_works(df.plot.bar, subplots=True, layout=(-1, 1))
-        self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
-        # When ax is supplied and required number of axes is 1,
-        # passed ax should be used:
-        fig, ax = self.plt.subplots()
-        axes = df.plot.bar(subplots=True, ax=ax)
-        assert len(axes) == 1
-        result = ax.axes
-        assert result is axes[0]
-
-    def test_integer_array_plot(self):
-        # GH 25587
-        arr = integer_array([1, 2, 3, 4], dtype="UInt32")
-
-        s = Series(arr)
-        _check_plot_works(s.plot.line)
-        _check_plot_works(s.plot.bar)
-        _check_plot_works(s.plot.hist)
-        _check_plot_works(s.plot.pie)
-
-        df = DataFrame({"x": arr, "y": arr})
-        _check_plot_works(df.plot.line)
-        _check_plot_works(df.plot.bar)
-        _check_plot_works(df.plot.hist)
-        _check_plot_works(df.plot.pie, y="y")
-        _check_plot_works(df.plot.scatter, x="x", y="y")
-        _check_plot_works(df.plot.hexbin, x="x", y="y")
-
-    def test_mpl2_color_cycle_str(self):
-        # GH 15516
-        colors = ["C" + str(x) for x in range(10)]
-        df = DataFrame(randn(10, 3), columns=["a", "b", "c"])
-        for c in colors:
-            _check_plot_works(df.plot, color=c)
-
-    def test_color_single_series_list(self):
-        # GH 3486
-        df = DataFrame({"A": [1, 2, 3]})
-        _check_plot_works(df.plot, color=["red"])
-
-    def test_rgb_tuple_color(self):
-        # GH 16695
-        df = DataFrame({"x": [1, 2], "y": [3, 4]})
-        _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0))
-        _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0, 0.5))
-
-    def test_color_empty_string(self):
-        df = DataFrame(randn(10, 2))
-        with pytest.raises(ValueError):
-            df.plot(color="")
-
-    def test_color_and_style_arguments(self):
-        df = DataFrame({"x": [1, 2], "y": [3, 4]})
-        # passing both 'color' and 'style' arguments should be allowed
-        # if there is no color symbol in the style strings:
-        ax = df.plot(color=["red", "black"], style=["-", "--"])
-        # check that the linestyles are correctly set:
-        linestyle = [line.get_linestyle() for line in ax.lines]
-        assert linestyle == ["-", "--"]
-        # check that the colors are correctly set:
-        color = [line.get_color() for line in ax.lines]
-        assert color == ["red", "black"]
-        # passing both 'color' and 'style' arguments should not be allowed
-        # if there is a color symbol in the style strings:
-        with pytest.raises(ValueError):
-            df.plot(color=["red", "black"], style=["k-", "r--"])
-
-    def test_nonnumeric_exclude(self):
-        df = DataFrame({"A": ["x", "y", "z"], "B": [1, 2, 3]})
-        ax = df.plot()
-        assert len(ax.get_lines()) == 1  # B was plotted
-
-    @pytest.mark.slow
-    def test_implicit_label(self):
-        df = DataFrame(randn(10, 3), columns=["a", "b", "c"])
-        ax = df.plot(x="a", y="b")
-        self._check_text_labels(ax.xaxis.get_label(), "a")
-
-    @pytest.mark.slow
-    def test_donot_overwrite_index_name(self):
-        # GH 8494
-        df = DataFrame(randn(2, 2), columns=["a", "b"])
-        df.index.name = "NAME"
-        df.plot(y="b", label="LABEL")
-        assert df.index.name == "NAME"
-
-    @pytest.mark.slow
-    def test_plot_xy(self):
-        # columns.inferred_type == 'string'
-        df = self.tdf
-        self._check_data(df.plot(x=0, y=1), df.set_index("A")["B"].plot())
-        self._check_data(df.plot(x=0), df.set_index("A").plot())
-        self._check_data(df.plot(y=0), df.B.plot())
-        self._check_data(df.plot(x="A", y="B"), df.set_index("A").B.plot())
-        self._check_data(df.plot(x="A"), df.set_index("A").plot())
-        self._check_data(df.plot(y="B"), df.B.plot())
-
-        # columns.inferred_type == 'integer'
-        df.columns = np.arange(1, len(df.columns) + 1)
-        self._check_data(df.plot(x=1, y=2), df.set_index(1)[2].plot())
-        self._check_data(df.plot(x=1), df.set_index(1).plot())
-        self._check_data(df.plot(y=1), df[1].plot())
-
-        # figsize and title
-        ax = df.plot(x=1, y=2, title="Test", figsize=(16, 8))
-        self._check_text_labels(ax.title, "Test")
-        self._check_axes_shape(ax, axes_num=1, layout=(1, 1), figsize=(16.0, 8.0))
-
-        # columns.inferred_type == 'mixed'
-        # TODO add MultiIndex test
-
-    @pytest.mark.slow
-    @pytest.mark.parametrize(
-        "input_log, expected_log", [(True, "log"), ("sym", "symlog")]
-    )
-    def test_logscales(self, input_log, expected_log):
-        df = DataFrame({"a": np.arange(100)}, index=np.arange(100))
-
-        ax = df.plot(logy=input_log)
-        self._check_ax_scales(ax, yaxis=expected_log)
-        assert ax.get_yscale() == expected_log
-
-        ax = df.plot(logx=input_log)
-        self._check_ax_scales(ax, xaxis=expected_log)
-        assert ax.get_xscale() == expected_log
-
-        ax = df.plot(loglog=input_log)
-        self._check_ax_scales(ax, xaxis=expected_log, yaxis=expected_log)
-        assert ax.get_xscale() == expected_log
-        assert ax.get_yscale() == expected_log
-
-    @pytest.mark.parametrize("input_param", ["logx", "logy", "loglog"])
-    def test_invalid_logscale(self, input_param):
-        # GH: 24867
-        df = DataFrame({"a": np.arange(100)}, index=np.arange(100))
-
-        msg = "Boolean, None and 'sym' are valid options, 'sm' is given."
-        with pytest.raises(ValueError, match=msg):
-            df.plot(**{input_param: "sm"})
-
-    @pytest.mark.slow
-    def test_xcompat(self):
-        import pandas as pd
-
-        df = self.tdf
-        ax = df.plot(x_compat=True)
-        lines = ax.get_lines()
-        assert not isinstance(lines[0].get_xdata(), PeriodIndex)
-
-        tm.close()
-        pd.plotting.plot_params["xaxis.compat"] = True
-        ax = df.plot()
-        lines = ax.get_lines()
-        assert not isinstance(lines[0].get_xdata(), PeriodIndex)
-
-        tm.close()
-        pd.plotting.plot_params["x_compat"] = False
-
-        ax = df.plot()
-        lines = ax.get_lines()
-        assert not isinstance(lines[0].get_xdata(), PeriodIndex)
-        assert isinstance(PeriodIndex(lines[0].get_xdata()), PeriodIndex)
-
-        tm.close()
-        # useful if you're plotting a bunch together
-        with pd.plotting.plot_params.use("x_compat", True):
-            ax = df.plot()
-            lines = ax.get_lines()
-            assert not isinstance(lines[0].get_xdata(), PeriodIndex)
-
-        tm.close()
-        ax = df.plot()
-        lines = ax.get_lines()
-        assert not isinstance(lines[0].get_xdata(), PeriodIndex)
-        assert isinstance(PeriodIndex(lines[0].get_xdata()), PeriodIndex)
-
-    def test_period_compat(self):
-        # GH 9012
-        # period-array conversions
-        df = DataFrame(
-            np.random.rand(21, 2),
-            index=bdate_range(datetime(2000, 1, 1), datetime(2000, 1, 31)),
-            columns=["a", "b"],
-        )
-
-        df.plot()
-        self.plt.axhline(y=0)
-        tm.close()
-
-    def test_unsorted_index(self):
-        df = DataFrame(
-            {"y": np.arange(100)}, index=np.arange(99, -1, -1), dtype=np.int64
-        )
-        ax = df.plot()
-        lines = ax.get_lines()[0]
-        rs = lines.get_xydata()
-        rs = Series(rs[:, 1], rs[:, 0], dtype=np.int64, name="y")
-        tm.assert_series_equal(rs, df.y, check_index_type=False)
-        tm.close()
-
-        df.index = pd.Index(np.arange(99, -1, -1), dtype=np.float64)
-        ax = df.plot()
-        lines = ax.get_lines()[0]
-        rs = lines.get_xydata()
-        rs = Series(rs[:, 1], rs[:, 0], dtype=np.int64, name="y")
-        tm.assert_series_equal(rs, df.y)
-
-    def test_unsorted_index_lims(self):
-        df = DataFrame({"y": [0.0, 1.0, 2.0, 3.0]}, index=[1.0, 0.0, 3.0, 2.0])
-        ax = df.plot()
-        xmin, xmax = ax.get_xlim()
-        lines = ax.get_lines()
-        assert xmin <= np.nanmin(lines[0].get_data()[0])
-        assert xmax >= np.nanmax(lines[0].get_data()[0])
-
-        df = DataFrame(
-            {"y": [0.0, 1.0, np.nan, 3.0, 4.0, 5.0, 6.0]},
-            index=[1.0, 0.0, 3.0, 2.0, np.nan, 3.0, 2.0],
-        )
-        ax = df.plot()
-        xmin, xmax = ax.get_xlim()
-        lines = ax.get_lines()
-        assert xmin <= np.nanmin(lines[0].get_data()[0])
-        assert xmax >= np.nanmax(lines[0].get_data()[0])
-
-        df = DataFrame({"y": [0.0, 1.0, 2.0, 3.0], "z": [91.0, 90.0, 93.0, 92.0]})
-        ax = df.plot(x="z", y="y")
-        xmin, xmax = ax.get_xlim()
-        lines = ax.get_lines()
-        assert xmin <= np.nanmin(lines[0].get_data()[0])
-        assert xmax >= np.nanmax(lines[0].get_data()[0])
-
-    @pytest.mark.slow
-    def test_subplots(self):
-        df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10]))
-
-        for kind in ["bar", "barh", "line", "area"]:
-            axes = df.plot(kind=kind, subplots=True, sharex=True, legend=True)
-            self._check_axes_shape(axes, axes_num=3, layout=(3, 1))
-            assert axes.shape == (3,)
-
-            for ax, column in zip(axes, df.columns):
-                self._check_legend_labels(ax, labels=[pprint_thing(column)])
-
-            for ax in axes[:-2]:
-                self._check_visible(ax.xaxis)  # xaxis must be visible for grid
-                self._check_visible(ax.get_xticklabels(), visible=False)
-                if not (kind == "bar" and self.mpl_ge_3_1_0):
-                    # change https://github.com/pandas-dev/pandas/issues/26714
-                    self._check_visible(ax.get_xticklabels(minor=True), visible=False)
-                self._check_visible(ax.xaxis.get_label(), visible=False)
-                self._check_visible(ax.get_yticklabels())
-
-            self._check_visible(axes[-1].xaxis)
-            self._check_visible(axes[-1].get_xticklabels())
-            self._check_visible(axes[-1].get_xticklabels(minor=True))
-            self._check_visible(axes[-1].xaxis.get_label())
-            self._check_visible(axes[-1].get_yticklabels())
-
-            axes = df.plot(kind=kind, subplots=True, sharex=False)
-            for ax in axes:
-                self._check_visible(ax.xaxis)
-                self._check_visible(ax.get_xticklabels())
-                self._check_visible(ax.get_xticklabels(minor=True))
-                self._check_visible(ax.xaxis.get_label())
-                self._check_visible(ax.get_yticklabels())
-
-            axes = df.plot(kind=kind, subplots=True, legend=False)
-            for ax in axes:
-                assert ax.get_legend() is None
-
-    def test_groupby_boxplot_sharey(self):
-        # https://github.com/pandas-dev/pandas/issues/20968
-        # sharey can now be switched check whether the right
-        # pair of axes is turned on or off
-
-        df = DataFrame(
-            {
-                "a": [-1.43, -0.15, -3.70, -1.43, -0.14],
-                "b": [0.56, 0.84, 0.29, 0.56, 0.85],
-                "c": [0, 1, 2, 3, 1],
-            },
-            index=[0, 1, 2, 3, 4],
-        )
-
-        # behavior without keyword
-        axes = df.groupby("c").boxplot()
-        expected = [True, False, True, False]
-        self._assert_ytickslabels_visibility(axes, expected)
-
-        # set sharey=True should be identical
-        axes = df.groupby("c").boxplot(sharey=True)
-        expected = [True, False, True, False]
-        self._assert_ytickslabels_visibility(axes, expected)
-
-        # sharey=False, all yticklabels should be visible
-        axes = df.groupby("c").boxplot(sharey=False)
-        expected = [True, True, True, True]
-        self._assert_ytickslabels_visibility(axes, expected)
-
-    def test_groupby_boxplot_sharex(self):
-        # https://github.com/pandas-dev/pandas/issues/20968
-        # sharex can now be switched check whether the right
-        # pair of axes is turned on or off
-
-        df = DataFrame(
-            {
-                "a": [-1.43, -0.15, -3.70, -1.43, -0.14],
-                "b": [0.56, 0.84, 0.29, 0.56, 0.85],
-                "c": [0, 1, 2, 3, 1],
-            },
-            index=[0, 1, 2, 3, 4],
-        )
-
-        # behavior without keyword
-        axes = df.groupby("c").boxplot()
-        expected = [True, True, True, True]
-        self._assert_xtickslabels_visibility(axes, expected)
-
-        # set sharex=False should be identical
-        axes = df.groupby("c").boxplot(sharex=False)
-        expected = [True, True, True, True]
-        self._assert_xtickslabels_visibility(axes, expected)
-
-        # sharex=True, yticklabels should be visible
-        # only for bottom plots
-        axes = df.groupby("c").boxplot(sharex=True)
-        expected = [False, False, True, True]
-        self._assert_xtickslabels_visibility(axes, expected)
-
-    @pytest.mark.xfail(reason="Waiting for PR 34334", strict=True)
-    @pytest.mark.slow
-    def test_subplots_timeseries(self):
-        idx = date_range(start="2014-07-01", freq="M", periods=10)
-        df = DataFrame(np.random.rand(10, 3), index=idx)
-
-        for kind in ["line", "area"]:
-            axes = df.plot(kind=kind, subplots=True, sharex=True)
-            self._check_axes_shape(axes, axes_num=3, layout=(3, 1))
-
-            for ax in axes[:-2]:
-                # GH 7801
-                self._check_visible(ax.xaxis)  # xaxis must be visible for grid
-                self._check_visible(ax.get_xticklabels(), visible=False)
-                self._check_visible(ax.get_xticklabels(minor=True), visible=False)
-                self._check_visible(ax.xaxis.get_label(), visible=False)
-                self._check_visible(ax.get_yticklabels())
-
-            self._check_visible(axes[-1].xaxis)
-            self._check_visible(axes[-1].get_xticklabels())
-            self._check_visible(axes[-1].get_xticklabels(minor=True))
-            self._check_visible(axes[-1].xaxis.get_label())
-            self._check_visible(axes[-1].get_yticklabels())
-            self._check_ticks_props(axes, xrot=0)
-
-            axes = df.plot(kind=kind, subplots=True, sharex=False, rot=45, fontsize=7)
-            for ax in axes:
-                self._check_visible(ax.xaxis)
-                self._check_visible(ax.get_xticklabels())
-                self._check_visible(ax.get_xticklabels(minor=True))
-                self._check_visible(ax.xaxis.get_label())
-                self._check_visible(ax.get_yticklabels())
-                self._check_ticks_props(ax, xlabelsize=7, xrot=45, ylabelsize=7)
-
-    def test_subplots_timeseries_y_axis(self):
-        # GH16953
-        data = {
-            "numeric": np.array([1, 2, 5]),
-            "timedelta": [
-                pd.Timedelta(-10, unit="s"),
-                pd.Timedelta(10, unit="m"),
-                pd.Timedelta(10, unit="h"),
-            ],
-            "datetime_no_tz": [
-                pd.to_datetime("2017-08-01 00:00:00"),
-                pd.to_datetime("2017-08-01 02:00:00"),
-                pd.to_datetime("2017-08-02 00:00:00"),
-            ],
-            "datetime_all_tz": [
-                pd.to_datetime("2017-08-01 00:00:00", utc=True),
-                pd.to_datetime("2017-08-01 02:00:00", utc=True),
-                pd.to_datetime("2017-08-02 00:00:00", utc=True),
-            ],
-            "text": ["This", "should", "fail"],
-        }
-        testdata = DataFrame(data)
-
-        ax_numeric = testdata.plot(y="numeric")
-        assert (
-            ax_numeric.get_lines()[0].get_data()[1] == testdata["numeric"].values
-        ).all()
-        ax_timedelta = testdata.plot(y="timedelta")
-        assert (
-            ax_timedelta.get_lines()[0].get_data()[1] == testdata["timedelta"].values
-        ).all()
-        ax_datetime_no_tz = testdata.plot(y="datetime_no_tz")
-        assert (
-            ax_datetime_no_tz.get_lines()[0].get_data()[1]
-            == testdata["datetime_no_tz"].values
-        ).all()
-        ax_datetime_all_tz = testdata.plot(y="datetime_all_tz")
-        assert (
-            ax_datetime_all_tz.get_lines()[0].get_data()[1]
-            == testdata["datetime_all_tz"].values
-        ).all()
-
-        msg = "no numeric data to plot"
-        with pytest.raises(TypeError, match=msg):
-            testdata.plot(y="text")
-
-    @pytest.mark.xfail(reason="not support for period, categorical, datetime_mixed_tz")
-    def test_subplots_timeseries_y_axis_not_supported(self):
-        """
-        This test will fail for:
-            period:
-                since period isn't yet implemented in ``select_dtypes``
-                and because it will need a custom value converter +
-                tick formatter (as was done for x-axis plots)
-
-            categorical:
-                 because it will need a custom value converter +
-                 tick formatter (also doesn't work for x-axis, as of now)
-
-            datetime_mixed_tz:
-                because of the way how pandas handles ``Series`` of
-                ``datetime`` objects with different timezone,
-                generally converting ``datetime`` objects in a tz-aware
-                form could help with this problem
-        """
-        data = {
-            "numeric": np.array([1, 2, 5]),
-            "period": [
-                pd.Period("2017-08-01 00:00:00", freq="H"),
-                pd.Period("2017-08-01 02:00", freq="H"),
-                pd.Period("2017-08-02 00:00:00", freq="H"),
-            ],
-            "categorical": pd.Categorical(
-                ["c", "b", "a"], categories=["a", "b", "c"], ordered=False
-            ),
-            "datetime_mixed_tz": [
-                pd.to_datetime("2017-08-01 00:00:00", utc=True),
-                pd.to_datetime("2017-08-01 02:00:00"),
-                pd.to_datetime("2017-08-02 00:00:00"),
-            ],
-        }
-        testdata = pd.DataFrame(data)
-        ax_period = testdata.plot(x="numeric", y="period")
-        assert (
-            ax_period.get_lines()[0].get_data()[1] == testdata["period"].values
-        ).all()
-        ax_categorical = testdata.plot(x="numeric", y="categorical")
-        assert (
-            ax_categorical.get_lines()[0].get_data()[1]
-            == testdata["categorical"].values
-        ).all()
-        ax_datetime_mixed_tz = testdata.plot(x="numeric", y="datetime_mixed_tz")
-        assert (
-            ax_datetime_mixed_tz.get_lines()[0].get_data()[1]
-            == testdata["datetime_mixed_tz"].values
-        ).all()
-
-    @pytest.mark.slow
-    def test_subplots_layout(self):
-        # GH 6667
-        df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10]))
-
-        axes = df.plot(subplots=True, layout=(2, 2))
-        self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
-        assert axes.shape == (2, 2)
-
-        axes = df.plot(subplots=True, layout=(-1, 2))
-        self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
-        assert axes.shape == (2, 2)
-
-        axes = df.plot(subplots=True, layout=(2, -1))
-        self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
-        assert axes.shape == (2, 2)
-
-        axes = df.plot(subplots=True, layout=(1, 4))
-        self._check_axes_shape(axes, axes_num=3, layout=(1, 4))
-        assert axes.shape == (1, 4)
-
-        axes = df.plot(subplots=True, layout=(-1, 4))
-        self._check_axes_shape(axes, axes_num=3, layout=(1, 4))
-        assert axes.shape == (1, 4)
-
-        axes = df.plot(subplots=True, layout=(4, -1))
-        self._check_axes_shape(axes, axes_num=3, layout=(4, 1))
-        assert axes.shape == (4, 1)
-
-        with pytest.raises(ValueError):
-            df.plot(subplots=True, layout=(1, 1))
-        with pytest.raises(ValueError):
-            df.plot(subplots=True, layout=(-1, -1))
-
-        # single column
-        df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10]))
-        axes = df.plot(subplots=True)
-        self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
-        assert axes.shape == (1,)
-
-        axes = df.plot(subplots=True, layout=(3, 3))
-        self._check_axes_shape(axes, axes_num=1, layout=(3, 3))
-        assert axes.shape == (3, 3)
-
-    @pytest.mark.slow
-    def test_subplots_warnings(self):
-        # GH 9464
-        with tm.assert_produces_warning(None):
-            df = DataFrame(np.random.randn(100, 4))
-            df.plot(subplots=True, layout=(3, 2))
-
-            df = DataFrame(
-                np.random.randn(100, 4), index=date_range("1/1/2000", periods=100)
-            )
-            df.plot(subplots=True, layout=(3, 2))
-
-    @pytest.mark.slow
-    def test_subplots_multiple_axes(self):
-        # GH 5353, 6970, GH 7069
-        fig, axes = self.plt.subplots(2, 3)
-        df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10]))
-
-        returned = df.plot(subplots=True, ax=axes[0], sharex=False, sharey=False)
-        self._check_axes_shape(returned, axes_num=3, layout=(1, 3))
-        assert returned.shape == (3,)
-        assert returned[0].figure is fig
-        # draw on second row
-        returned = df.plot(subplots=True, ax=axes[1], sharex=False, sharey=False)
-        self._check_axes_shape(returned, axes_num=3, layout=(1, 3))
-        assert returned.shape == (3,)
-        assert returned[0].figure is fig
-        self._check_axes_shape(axes, axes_num=6, layout=(2, 3))
-        tm.close()
-
-        with pytest.raises(ValueError):
-            fig, axes = self.plt.subplots(2, 3)
-            # pass different number of axes from required
-            df.plot(subplots=True, ax=axes)
-
-        # pass 2-dim axes and invalid layout
-        # invalid lauout should not affect to input and return value
-        # (show warning is tested in
-        # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes
-        fig, axes = self.plt.subplots(2, 2)
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore", UserWarning)
-            df = DataFrame(np.random.rand(10, 4), index=list(string.ascii_letters[:10]))
-
-            returned = df.plot(
-                subplots=True, ax=axes, layout=(2, 1), sharex=False, sharey=False
-            )
-            self._check_axes_shape(returned, axes_num=4, layout=(2, 2))
-            assert returned.shape == (4,)
-
-            returned = df.plot(
-                subplots=True, ax=axes, layout=(2, -1), sharex=False, sharey=False
-            )
-            self._check_axes_shape(returned, axes_num=4, layout=(2, 2))
-            assert returned.shape == (4,)
-
-            returned = df.plot(
-                subplots=True, ax=axes, layout=(-1, 2), sharex=False, sharey=False
-            )
-        self._check_axes_shape(returned, axes_num=4, layout=(2, 2))
-        assert returned.shape == (4,)
-
-        # single column
-        fig, axes = self.plt.subplots(1, 1)
-        df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10]))
-
-        axes = df.plot(subplots=True, ax=[axes], sharex=False, sharey=False)
-        self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
-        assert axes.shape == (1,)
-
-    def test_subplots_ts_share_axes(self):
-        # GH 3964
-        fig, axes = self.plt.subplots(3, 3, sharex=True, sharey=True)
-        self.plt.subplots_adjust(left=0.05, right=0.95, hspace=0.3, wspace=0.3)
-        df = DataFrame(
-            np.random.randn(10, 9),
-            index=date_range(start="2014-07-01", freq="M", periods=10),
-        )
-        for i, ax in enumerate(axes.ravel()):
-            df[i].plot(ax=ax, fontsize=5)
-
-        # Rows other than bottom should not be visible
-        for ax in axes[0:-1].ravel():
-            self._check_visible(ax.get_xticklabels(), visible=False)
-
-        # Bottom row should be visible
-        for ax in axes[-1].ravel():
-            self._check_visible(ax.get_xticklabels(), visible=True)
-
-        # First column should be visible
-        for ax in axes[[0, 1, 2], [0]].ravel():
-            self._check_visible(ax.get_yticklabels(), visible=True)
-
-        # Other columns should not be visible
-        for ax in axes[[0, 1, 2], [1]].ravel():
-            self._check_visible(ax.get_yticklabels(), visible=False)
-        for ax in axes[[0, 1, 2], [2]].ravel():
-            self._check_visible(ax.get_yticklabels(), visible=False)
-
-    def test_subplots_sharex_axes_existing_axes(self):
-        # GH 9158
-        d = {"A": [1.0, 2.0, 3.0, 4.0], "B": [4.0, 3.0, 2.0, 1.0], "C": [5, 1, 3, 4]}
-        df = DataFrame(d, index=date_range("2014 10 11", "2014 10 14"))
-
-        axes = df[["A", "B"]].plot(subplots=True)
-        df["C"].plot(ax=axes[0], secondary_y=True)
-
-        self._check_visible(axes[0].get_xticklabels(), visible=False)
-        self._check_visible(axes[1].get_xticklabels(), visible=True)
-        for ax in axes.ravel():
-            self._check_visible(ax.get_yticklabels(), visible=True)
-
-    @pytest.mark.slow
-    def test_subplots_dup_columns(self):
-        # GH 10962
-        df = DataFrame(np.random.rand(5, 5), columns=list("aaaaa"))
-        axes = df.plot(subplots=True)
-        for ax in axes:
-            self._check_legend_labels(ax, labels=["a"])
-            assert len(ax.lines) == 1
-        tm.close()
-
-        axes = df.plot(subplots=True, secondary_y="a")
-        for ax in axes:
-            # (right) is only attached when subplots=False
-            self._check_legend_labels(ax, labels=["a"])
-            assert len(ax.lines) == 1
-        tm.close()
-
-        ax = df.plot(secondary_y="a")
-        self._check_legend_labels(ax, labels=["a (right)"] * 5)
-        assert len(ax.lines) == 0
-        assert len(ax.right_ax.lines) == 5
-
-    def test_negative_log(self):
-        df = -DataFrame(
-            rand(6, 4),
-            index=list(string.ascii_letters[:6]),
-            columns=["x", "y", "z", "four"],
-        )
-
-        with pytest.raises(ValueError):
-            df.plot.area(logy=True)
-        with pytest.raises(ValueError):
-            df.plot.area(loglog=True)
-
-    def _compare_stacked_y_cood(self, normal_lines, stacked_lines):
-        base = np.zeros(len(normal_lines[0].get_data()[1]))
-        for nl, sl in zip(normal_lines, stacked_lines):
-            base += nl.get_data()[1]  # get y coordinates
-            sy = sl.get_data()[1]
-            tm.assert_numpy_array_equal(base, sy)
-
-    def test_line_area_stacked(self):
-        with tm.RNGContext(42):
-            df = DataFrame(rand(6, 4), columns=["w", "x", "y", "z"])
-            neg_df = -df
-            # each column has either positive or negative value
-            sep_df = DataFrame(
-                {"w": rand(6), "x": rand(6), "y": -rand(6), "z": -rand(6)}
-            )
-            # each column has positive-negative mixed value
-            mixed_df = DataFrame(
-                randn(6, 4),
-                index=list(string.ascii_letters[:6]),
-                columns=["w", "x", "y", "z"],
-            )
-
-            for kind in ["line", "area"]:
-                ax1 = _check_plot_works(df.plot, kind=kind, stacked=False)
-                ax2 = _check_plot_works(df.plot, kind=kind, stacked=True)
-                self._compare_stacked_y_cood(ax1.lines, ax2.lines)
-
-                ax1 = _check_plot_works(neg_df.plot, kind=kind, stacked=False)
-                ax2 = _check_plot_works(neg_df.plot, kind=kind, stacked=True)
-                self._compare_stacked_y_cood(ax1.lines, ax2.lines)
-
-                ax1 = _check_plot_works(sep_df.plot, kind=kind, stacked=False)
-                ax2 = _check_plot_works(sep_df.plot, kind=kind, stacked=True)
-                self._compare_stacked_y_cood(ax1.lines[:2], ax2.lines[:2])
-                self._compare_stacked_y_cood(ax1.lines[2:], ax2.lines[2:])
-
-                _check_plot_works(mixed_df.plot, stacked=False)
-                with pytest.raises(ValueError):
-                    mixed_df.plot(stacked=True)
-
-                # Use an index with strictly positive values, preventing
-                #  matplotlib from warning about ignoring xlim
-                df2 = df.set_index(df.index + 1)
-                _check_plot_works(df2.plot, kind=kind, logx=True, stacked=True)
-
-    def test_line_area_nan_df(self):
-        values1 = [1, 2, np.nan, 3]
-        values2 = [3, np.nan, 2, 1]
-        df = DataFrame({"a": values1, "b": values2})
-        tdf = DataFrame({"a": values1, "b": values2}, index=tm.makeDateIndex(k=4))
-
-        for d in [df, tdf]:
-            ax = _check_plot_works(d.plot)
-            masked1 = ax.lines[0].get_ydata()
-            masked2 = ax.lines[1].get_ydata()
-            # remove nan for comparison purpose
-
-            exp = np.array([1, 2, 3], dtype=np.float64)
-            tm.assert_numpy_array_equal(np.delete(masked1.data, 2), exp)
-
-            exp = np.array([3, 2, 1], dtype=np.float64)
-            tm.assert_numpy_array_equal(np.delete(masked2.data, 1), exp)
-            tm.assert_numpy_array_equal(
-                masked1.mask, np.array([False, False, True, False])
-            )
-            tm.assert_numpy_array_equal(
-                masked2.mask, np.array([False, True, False, False])
-            )
-
-            expected1 = np.array([1, 2, 0, 3], dtype=np.float64)
-            expected2 = np.array([3, 0, 2, 1], dtype=np.float64)
-
-            ax = _check_plot_works(d.plot, stacked=True)
-            tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1)
-            tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected1 + expected2)
-
-            ax = _check_plot_works(d.plot.area)
-            tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1)
-            tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected1 + expected2)
-
-            ax = _check_plot_works(d.plot.area, stacked=False)
-            tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1)
-            tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected2)
-
-    def test_line_lim(self):
-        df = DataFrame(rand(6, 3), columns=["x", "y", "z"])
-        ax = df.plot()
-        xmin, xmax = ax.get_xlim()
-        lines = ax.get_lines()
-        assert xmin <= lines[0].get_data()[0][0]
-        assert xmax >= lines[0].get_data()[0][-1]
-
-        ax = df.plot(secondary_y=True)
-        xmin, xmax = ax.get_xlim()
-        lines = ax.get_lines()
-        assert xmin <= lines[0].get_data()[0][0]
-        assert xmax >= lines[0].get_data()[0][-1]
-
-        axes = df.plot(secondary_y=True, subplots=True)
-        self._check_axes_shape(axes, axes_num=3, layout=(3, 1))
-        for ax in axes:
-            assert hasattr(ax, "left_ax")
-            assert not hasattr(ax, "right_ax")
-            xmin, xmax = ax.get_xlim()
-            lines = ax.get_lines()
-            assert xmin <= lines[0].get_data()[0][0]
-            assert xmax >= lines[0].get_data()[0][-1]
-
-    def test_area_lim(self):
-        df = DataFrame(rand(6, 4), columns=["x", "y", "z", "four"])
-
-        neg_df = -df
-        for stacked in [True, False]:
-            ax = _check_plot_works(df.plot.area, stacked=stacked)
-            xmin, xmax = ax.get_xlim()
-            ymin, ymax = ax.get_ylim()
-            lines = ax.get_lines()
-            assert xmin <= lines[0].get_data()[0][0]
-            assert xmax >= lines[0].get_data()[0][-1]
-            assert ymin == 0
-
-            ax = _check_plot_works(neg_df.plot.area, stacked=stacked)
-            ymin, ymax = ax.get_ylim()
-            assert ymax == 0
-
-    @pytest.mark.slow
-    def test_bar_colors(self):
-        import matplotlib.pyplot as plt
-
-        default_colors = self._unpack_cycler(plt.rcParams)
-
-        df = DataFrame(randn(5, 5))
-        ax = df.plot.bar()
-        self._check_colors(ax.patches[::5], facecolors=default_colors[:5])
-        tm.close()
-
-        custom_colors = "rgcby"
-        ax = df.plot.bar(color=custom_colors)
-        self._check_colors(ax.patches[::5], facecolors=custom_colors)
-        tm.close()
-
-        from matplotlib import cm
-
-        # Test str -> colormap functionality
-        ax = df.plot.bar(colormap="jet")
-        rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)]
-        self._check_colors(ax.patches[::5], facecolors=rgba_colors)
-        tm.close()
-
-        # Test colormap functionality
-        ax = df.plot.bar(colormap=cm.jet)
-        rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)]
-        self._check_colors(ax.patches[::5], facecolors=rgba_colors)
-        tm.close()
-
-        ax = df.loc[:, [0]].plot.bar(color="DodgerBlue")
-        self._check_colors([ax.patches[0]], facecolors=["DodgerBlue"])
-        tm.close()
-
-        ax = df.plot(kind="bar", color="green")
-        self._check_colors(ax.patches[::5], facecolors=["green"] * 5)
-        tm.close()
-
-    def test_bar_user_colors(self):
-        df = pd.DataFrame(
-            {"A": range(4), "B": range(1, 5), "color": ["red", "blue", "blue", "red"]}
-        )
-        # This should *only* work when `y` is specified, else
-        # we use one color per column
-        ax = df.plot.bar(y="A", color=df["color"])
-        result = [p.get_facecolor() for p in ax.patches]
-        expected = [
-            (1.0, 0.0, 0.0, 1.0),
-            (0.0, 0.0, 1.0, 1.0),
-            (0.0, 0.0, 1.0, 1.0),
-            (1.0, 0.0, 0.0, 1.0),
-        ]
-        assert result == expected
-
-    @pytest.mark.slow
-    def test_bar_linewidth(self):
-        df = DataFrame(randn(5, 5))
-
-        # regular
-        ax = df.plot.bar(linewidth=2)
-        for r in ax.patches:
-            assert r.get_linewidth() == 2
-
-        # stacked
-        ax = df.plot.bar(stacked=True, linewidth=2)
-        for r in ax.patches:
-            assert r.get_linewidth() == 2
-
-        # subplots
-        axes = df.plot.bar(linewidth=2, subplots=True)
-        self._check_axes_shape(axes, axes_num=5, layout=(5, 1))
-        for ax in axes:
-            for r in ax.patches:
-                assert r.get_linewidth() == 2
-
-    @pytest.mark.slow
-    def test_bar_barwidth(self):
-        df = DataFrame(randn(5, 5))
-
-        width = 0.9
-
-        # regular
-        ax = df.plot.bar(width=width)
-        for r in ax.patches:
-            assert r.get_width() == width / len(df.columns)
-
-        # stacked
-        ax = df.plot.bar(stacked=True, width=width)
-        for r in ax.patches:
-            assert r.get_width() == width
-
-        # horizontal regular
-        ax = df.plot.barh(width=width)
-        for r in ax.patches:
-            assert r.get_height() == width / len(df.columns)
-
-        # horizontal stacked
-        ax = df.plot.barh(stacked=True, width=width)
-        for r in ax.patches:
-            assert r.get_height() == width
-
-        # subplots
-        axes = df.plot.bar(width=width, subplots=True)
-        for ax in axes:
-            for r in ax.patches:
-                assert r.get_width() == width
-
-        # horizontal subplots
-        axes = df.plot.barh(width=width, subplots=True)
-        for ax in axes:
-            for r in ax.patches:
-                assert r.get_height() == width
-
-    @pytest.mark.slow
-    def test_bar_barwidth_position(self):
-        df = DataFrame(randn(5, 5))
-        self._check_bar_alignment(
-            df, kind="bar", stacked=False, width=0.9, position=0.2
-        )
-        self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, position=0.2)
-        self._check_bar_alignment(
-            df, kind="barh", stacked=False, width=0.9, position=0.2
-        )
-        self._check_bar_alignment(
-            df, kind="barh", stacked=True, width=0.9, position=0.2
-        )
-        self._check_bar_alignment(
-            df, kind="bar", subplots=True, width=0.9, position=0.2
-        )
-        self._check_bar_alignment(
-            df, kind="barh", subplots=True, width=0.9, position=0.2
-        )
-
-    @pytest.mark.slow
-    def test_bar_barwidth_position_int(self):
-        # GH 12979
-        df = DataFrame(randn(5, 5))
-
-        for w in [1, 1.0]:
-            ax = df.plot.bar(stacked=True, width=w)
-            ticks = ax.xaxis.get_ticklocs()
-            tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4]))
-            assert ax.get_xlim() == (-0.75, 4.75)
-            # check left-edge of bars
-            assert ax.patches[0].get_x() == -0.5
-            assert ax.patches[-1].get_x() == 3.5
-
-        self._check_bar_alignment(df, kind="bar", stacked=True, width=1)
-        self._check_bar_alignment(df, kind="barh", stacked=False, width=1)
-        self._check_bar_alignment(df, kind="barh", stacked=True, width=1)
-        self._check_bar_alignment(df, kind="bar", subplots=True, width=1)
-        self._check_bar_alignment(df, kind="barh", subplots=True, width=1)
-
-    @pytest.mark.slow
-    def test_bar_bottom_left(self):
-        df = DataFrame(rand(5, 5))
-        ax = df.plot.bar(stacked=False, bottom=1)
-        result = [p.get_y() for p in ax.patches]
-        assert result == [1] * 25
-
-        ax = df.plot.bar(stacked=True, bottom=[-1, -2, -3, -4, -5])
-        result = [p.get_y() for p in ax.patches[:5]]
-        assert result == [-1, -2, -3, -4, -5]
-
-        ax = df.plot.barh(stacked=False, left=np.array([1, 1, 1, 1, 1]))
-        result = [p.get_x() for p in ax.patches]
-        assert result == [1] * 25
-
-        ax = df.plot.barh(stacked=True, left=[1, 2, 3, 4, 5])
-        result = [p.get_x() for p in ax.patches[:5]]
-        assert result == [1, 2, 3, 4, 5]
-
-        axes = df.plot.bar(subplots=True, bottom=-1)
-        for ax in axes:
-            result = [p.get_y() for p in ax.patches]
-            assert result == [-1] * 5
-
-        axes = df.plot.barh(subplots=True, left=np.array([1, 1, 1, 1, 1]))
-        for ax in axes:
-            result = [p.get_x() for p in ax.patches]
-            assert result == [1] * 5
-
-    @pytest.mark.slow
-    def test_bar_nan(self):
-        df = DataFrame({"A": [10, np.nan, 20], "B": [5, 10, 20], "C": [1, 2, 3]})
-        ax = df.plot.bar()
-        expected = [10, 0, 20, 5, 10, 20, 1, 2, 3]
-        result = [p.get_height() for p in ax.patches]
-        assert result == expected
-
-        ax = df.plot.bar(stacked=True)
-        result = [p.get_height() for p in ax.patches]
-        assert result == expected
-
-        result = [p.get_y() for p in ax.patches]
-        expected = [0.0, 0.0, 0.0, 10.0, 0.0, 20.0, 15.0, 10.0, 40.0]
-        assert result == expected
-
-    @pytest.mark.slow
-    def test_bar_categorical(self):
-        # GH 13019
-        df1 = pd.DataFrame(
-            np.random.randn(6, 5),
-            index=pd.Index(list("ABCDEF")),
-            columns=pd.Index(list("abcde")),
-        )
-        # categorical index must behave the same
-        df2 = pd.DataFrame(
-            np.random.randn(6, 5),
-            index=pd.CategoricalIndex(list("ABCDEF")),
-            columns=pd.CategoricalIndex(list("abcde")),
-        )
-
-        for df in [df1, df2]:
-            ax = df.plot.bar()
-            ticks = ax.xaxis.get_ticklocs()
-            tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4, 5]))
-            assert ax.get_xlim() == (-0.5, 5.5)
-            # check left-edge of bars
-            assert ax.patches[0].get_x() == -0.25
-            assert ax.patches[-1].get_x() == 5.15
-
-            ax = df.plot.bar(stacked=True)
-            tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4, 5]))
-            assert ax.get_xlim() == (-0.5, 5.5)
-            assert ax.patches[0].get_x() == -0.25
-            assert ax.patches[-1].get_x() == 4.75
-
-    @pytest.mark.slow
-    def test_plot_scatter(self):
-        df = DataFrame(
-            randn(6, 4),
-            index=list(string.ascii_letters[:6]),
-            columns=["x", "y", "z", "four"],
-        )
-
-        _check_plot_works(df.plot.scatter, x="x", y="y")
-        _check_plot_works(df.plot.scatter, x=1, y=2)
-
-        with pytest.raises(TypeError):
-            df.plot.scatter(x="x")
-        with pytest.raises(TypeError):
-            df.plot.scatter(y="y")
-
-        # GH 6951
-        axes = df.plot(x="x", y="y", kind="scatter", subplots=True)
-        self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
-
-    def test_raise_error_on_datetime_time_data(self):
-        # GH 8113, datetime.time type is not supported by matplotlib in scatter
-        df = pd.DataFrame(np.random.randn(10), columns=["a"])
-        df["dtime"] = pd.date_range(start="2014-01-01", freq="h", periods=10).time
-        msg = "must be a string or a number, not 'datetime.time'"
-
-        with pytest.raises(TypeError, match=msg):
-            df.plot(kind="scatter", x="dtime", y="a")
-
-    def test_scatterplot_datetime_data(self):
-        # GH 30391
-        dates = pd.date_range(start=date(2019, 1, 1), periods=12, freq="W")
-        vals = np.random.normal(0, 1, len(dates))
-        df = pd.DataFrame({"dates": dates, "vals": vals})
-
-        _check_plot_works(df.plot.scatter, x="dates", y="vals")
-        _check_plot_works(df.plot.scatter, x=0, y=1)
-
-    def test_scatterplot_object_data(self):
-        # GH 18755
-        df = pd.DataFrame(dict(a=["A", "B", "C"], b=[2, 3, 4]))
-
-        _check_plot_works(df.plot.scatter, x="a", y="b")
-        _check_plot_works(df.plot.scatter, x=0, y=1)
-
-        df = pd.DataFrame(dict(a=["A", "B", "C"], b=["a", "b", "c"]))
-
-        _check_plot_works(df.plot.scatter, x="a", y="b")
-        _check_plot_works(df.plot.scatter, x=0, y=1)
-
-    @pytest.mark.slow
-    def test_if_scatterplot_colorbar_affects_xaxis_visibility(self):
-        # addressing issue #10611, to ensure colobar does not
-        # interfere with x-axis label and ticklabels with
-        # ipython inline backend.
-        random_array = np.random.random((1000, 3))
-        df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"])
-
-        ax1 = df.plot.scatter(x="A label", y="B label")
-        ax2 = df.plot.scatter(x="A label", y="B label", c="C label")
-
-        vis1 = [vis.get_visible() for vis in ax1.xaxis.get_minorticklabels()]
-        vis2 = [vis.get_visible() for vis in ax2.xaxis.get_minorticklabels()]
-        assert vis1 == vis2
-
-        vis1 = [vis.get_visible() for vis in ax1.xaxis.get_majorticklabels()]
-        vis2 = [vis.get_visible() for vis in ax2.xaxis.get_majorticklabels()]
-        assert vis1 == vis2
-
-        assert (
-            ax1.xaxis.get_label().get_visible() == ax2.xaxis.get_label().get_visible()
-        )
-
-    @pytest.mark.slow
-    def test_if_hexbin_xaxis_label_is_visible(self):
-        # addressing issue #10678, to ensure colobar does not
-        # interfere with x-axis label and ticklabels with
-        # ipython inline backend.
-        random_array = np.random.random((1000, 3))
-        df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"])
-
-        ax = df.plot.hexbin("A label", "B label", gridsize=12)
-        assert all(vis.get_visible() for vis in ax.xaxis.get_minorticklabels())
-        assert all(vis.get_visible() for vis in ax.xaxis.get_majorticklabels())
-        assert ax.xaxis.get_label().get_visible()
-
-    @pytest.mark.slow
-    def test_if_scatterplot_colorbars_are_next_to_parent_axes(self):
-        import matplotlib.pyplot as plt
-
-        random_array = np.random.random((1000, 3))
-        df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"])
-
-        fig, axes = plt.subplots(1, 2)
-        df.plot.scatter("A label", "B label", c="C label", ax=axes[0])
-        df.plot.scatter("A label", "B label", c="C label", ax=axes[1])
-        plt.tight_layout()
-
-        points = np.array([ax.get_position().get_points() for ax in fig.axes])
-        axes_x_coords = points[:, :, 0]
-        parent_distance = axes_x_coords[1, :] - axes_x_coords[0, :]
-        colorbar_distance = axes_x_coords[3, :] - axes_x_coords[2, :]
-        assert np.isclose(parent_distance, colorbar_distance, atol=1e-7).all()
-
-    @pytest.mark.parametrize("x, y", [("x", "y"), ("y", "x"), ("y", "y")])
-    @pytest.mark.slow
-    def test_plot_scatter_with_categorical_data(self, x, y):
-        # after fixing GH 18755, should be able to plot categorical data
-        df = pd.DataFrame(
-            {"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])}
-        )
-
-        _check_plot_works(df.plot.scatter, x=x, y=y)
-
-    @pytest.mark.slow
-    def test_plot_scatter_with_c(self):
-        df = DataFrame(
-            randn(6, 4),
-            index=list(string.ascii_letters[:6]),
-            columns=["x", "y", "z", "four"],
-        )
-
-        axes = [df.plot.scatter(x="x", y="y", c="z"), df.plot.scatter(x=0, y=1, c=2)]
-        for ax in axes:
-            # default to Greys
-            assert ax.collections[0].cmap.name == "Greys"
-
-            # n.b. there appears to be no public method
-            # to get the colorbar label
-            assert ax.collections[0].colorbar._label == "z"
-
-        cm = "cubehelix"
-        ax = df.plot.scatter(x="x", y="y", c="z", colormap=cm)
-        assert ax.collections[0].cmap.name == cm
-
-        # verify turning off colorbar works
-        ax = df.plot.scatter(x="x", y="y", c="z", colorbar=False)
-        assert ax.collections[0].colorbar is None
-
-        # verify that we can still plot a solid color
-        ax = df.plot.scatter(x=0, y=1, c="red")
-        assert ax.collections[0].colorbar is None
-        self._check_colors(ax.collections, facecolors=["r"])
-
-        # Ensure that we can pass an np.array straight through to matplotlib,
-        # this functionality was accidentally removed previously.
-        # See https://github.com/pandas-dev/pandas/issues/8852 for bug report
-        #
-        # Exercise colormap path and non-colormap path as they are independent
-        #
-        df = DataFrame({"A": [1, 2], "B": [3, 4]})
-        red_rgba = [1.0, 0.0, 0.0, 1.0]
-        green_rgba = [0.0, 1.0, 0.0, 1.0]
-        rgba_array = np.array([red_rgba, green_rgba])
-        ax = df.plot.scatter(x="A", y="B", c=rgba_array)
-        # expect the face colors of the points in the non-colormap path to be
-        # identical to the values we supplied, normally we'd be on shaky ground
-        # comparing floats for equality but here we expect them to be
-        # identical.
-        tm.assert_numpy_array_equal(ax.collections[0].get_facecolor(), rgba_array)
-        # we don't test the colors of the faces in this next plot because they
-        # are dependent on the spring colormap, which may change its colors
-        # later.
-        float_array = np.array([0.0, 1.0])
-        df.plot.scatter(x="A", y="B", c=float_array, cmap="spring")
-
-    @pytest.mark.parametrize("cmap", [None, "Greys"])
-    def test_scatter_with_c_column_name_with_colors(self, cmap):
-        # https://github.com/pandas-dev/pandas/issues/34316
-        df = pd.DataFrame(
-            [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]],
-            columns=["length", "width"],
-        )
-        df["species"] = ["r", "r", "g", "g", "b"]
-        ax = df.plot.scatter(x=0, y=1, c="species", cmap=cmap)
-        assert ax.collections[0].colorbar is None
-
-    def test_plot_scatter_with_s(self):
-        # this refers to GH 32904
-        df = DataFrame(np.random.random((10, 3)) * 100, columns=["a", "b", "c"],)
-
-        ax = df.plot.scatter(x="a", y="b", s="c")
-        tm.assert_numpy_array_equal(df["c"].values, right=ax.collections[0].get_sizes())
-
-    def test_scatter_colors(self):
-        df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]})
-        with pytest.raises(TypeError):
-            df.plot.scatter(x="a", y="b", c="c", color="green")
-
-        default_colors = self._unpack_cycler(self.plt.rcParams)
-
-        ax = df.plot.scatter(x="a", y="b", c="c")
-        tm.assert_numpy_array_equal(
-            ax.collections[0].get_facecolor()[0],
-            np.array(self.colorconverter.to_rgba(default_colors[0])),
-        )
-
-        ax = df.plot.scatter(x="a", y="b", color="white")
-        tm.assert_numpy_array_equal(
-            ax.collections[0].get_facecolor()[0],
-            np.array([1, 1, 1, 1], dtype=np.float64),
-        )
-
-    def test_scatter_colorbar_different_cmap(self):
-        # GH 33389
-        import matplotlib.pyplot as plt
-
-        df = pd.DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]})
-        df["x2"] = df["x"] + 1
-
-        fig, ax = plt.subplots()
-        df.plot("x", "y", c="c", kind="scatter", cmap="cividis", ax=ax)
-        df.plot("x2", "y", c="c", kind="scatter", cmap="magma", ax=ax)
-
-        assert ax.collections[0].cmap.name == "cividis"
-        assert ax.collections[1].cmap.name == "magma"
-
-    @pytest.mark.slow
-    def test_plot_bar(self):
-        df = DataFrame(
-            randn(6, 4),
-            index=list(string.ascii_letters[:6]),
-            columns=["one", "two", "three", "four"],
-        )
-
-        _check_plot_works(df.plot.bar)
-        _check_plot_works(df.plot.bar, legend=False)
-        # _check_plot_works adds an ax so catch warning. see GH #13188
-        with tm.assert_produces_warning(UserWarning):
-            _check_plot_works(df.plot.bar, subplots=True)
-        _check_plot_works(df.plot.bar, stacked=True)
-
-        df = DataFrame(
-            randn(10, 15), index=list(string.ascii_letters[:10]), columns=range(15)
-        )
-        _check_plot_works(df.plot.bar)
-
-        df = DataFrame({"a": [0, 1], "b": [1, 0]})
-        ax = _check_plot_works(df.plot.bar)
-        self._check_ticks_props(ax, xrot=90)
-
-        ax = df.plot.bar(rot=35, fontsize=10)
-        self._check_ticks_props(ax, xrot=35, xlabelsize=10, ylabelsize=10)
-
-        ax = _check_plot_works(df.plot.barh)
-        self._check_ticks_props(ax, yrot=0)
-
-        ax = df.plot.barh(rot=55, fontsize=11)
-        self._check_ticks_props(ax, yrot=55, ylabelsize=11, xlabelsize=11)
-
-    def _check_bar_alignment(
-        self,
-        df,
-        kind="bar",
-        stacked=False,
-        subplots=False,
-        align="center",
-        width=0.5,
-        position=0.5,
-    ):
-
-        axes = df.plot(
-            kind=kind,
-            stacked=stacked,
-            subplots=subplots,
-            align=align,
-            width=width,
-            position=position,
-            grid=True,
-        )
-
-        axes = self._flatten_visible(axes)
-
-        for ax in axes:
-            if kind == "bar":
-                axis = ax.xaxis
-                ax_min, ax_max = ax.get_xlim()
-                min_edge = min(p.get_x() for p in ax.patches)
-                max_edge = max(p.get_x() + p.get_width() for p in ax.patches)
-            elif kind == "barh":
-                axis = ax.yaxis
-                ax_min, ax_max = ax.get_ylim()
-                min_edge = min(p.get_y() for p in ax.patches)
-                max_edge = max(p.get_y() + p.get_height() for p in ax.patches)
-            else:
-                raise ValueError
-
-            # GH 7498
-            # compare margins between lim and bar edges
-            tm.assert_almost_equal(ax_min, min_edge - 0.25)
-            tm.assert_almost_equal(ax_max, max_edge + 0.25)
-
-            p = ax.patches[0]
-            if kind == "bar" and (stacked is True or subplots is True):
-                edge = p.get_x()
-                center = edge + p.get_width() * position
-            elif kind == "bar" and stacked is False:
-                center = p.get_x() + p.get_width() * len(df.columns) * position
-                edge = p.get_x()
-            elif kind == "barh" and (stacked is True or subplots is True):
-                center = p.get_y() + p.get_height() * position
-                edge = p.get_y()
-            elif kind == "barh" and stacked is False:
-                center = p.get_y() + p.get_height() * len(df.columns) * position
-                edge = p.get_y()
-            else:
-                raise ValueError
-
-            # Check the ticks locates on integer
-            assert (axis.get_ticklocs() == np.arange(len(df))).all()
-
-            if align == "center":
-                # Check whether the bar locates on center
-                tm.assert_almost_equal(axis.get_ticklocs()[0], center)
-            elif align == "edge":
-                # Check whether the bar's edge starts from the tick
-                tm.assert_almost_equal(axis.get_ticklocs()[0], edge)
-            else:
-                raise ValueError
-
-        return axes
-
-    @pytest.mark.slow
-    def test_bar_stacked_center(self):
-        # GH2157
-        df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5))
-        self._check_bar_alignment(df, kind="bar", stacked=True)
-        self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9)
-        self._check_bar_alignment(df, kind="barh", stacked=True)
-        self._check_bar_alignment(df, kind="barh", stacked=True, width=0.9)
-
-    @pytest.mark.slow
-    def test_bar_center(self):
-        df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5))
-        self._check_bar_alignment(df, kind="bar", stacked=False)
-        self._check_bar_alignment(df, kind="bar", stacked=False, width=0.9)
-        self._check_bar_alignment(df, kind="barh", stacked=False)
-        self._check_bar_alignment(df, kind="barh", stacked=False, width=0.9)
-
-    @pytest.mark.slow
-    def test_bar_subplots_center(self):
-        df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5))
-        self._check_bar_alignment(df, kind="bar", subplots=True)
-        self._check_bar_alignment(df, kind="bar", subplots=True, width=0.9)
-        self._check_bar_alignment(df, kind="barh", subplots=True)
-        self._check_bar_alignment(df, kind="barh", subplots=True, width=0.9)
-
-    @pytest.mark.slow
-    def test_bar_align_single_column(self):
-        df = DataFrame(randn(5))
-        self._check_bar_alignment(df, kind="bar", stacked=False)
-        self._check_bar_alignment(df, kind="bar", stacked=True)
-        self._check_bar_alignment(df, kind="barh", stacked=False)
-        self._check_bar_alignment(df, kind="barh", stacked=True)
-        self._check_bar_alignment(df, kind="bar", subplots=True)
-        self._check_bar_alignment(df, kind="barh", subplots=True)
-
-    @pytest.mark.slow
-    def test_bar_edge(self):
-        df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5))
-
-        self._check_bar_alignment(df, kind="bar", stacked=True, align="edge")
-        self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, align="edge")
-        self._check_bar_alignment(df, kind="barh", stacked=True, align="edge")
-        self._check_bar_alignment(
-            df, kind="barh", stacked=True, width=0.9, align="edge"
-        )
-
-        self._check_bar_alignment(df, kind="bar", stacked=False, align="edge")
-        self._check_bar_alignment(
-            df, kind="bar", stacked=False, width=0.9, align="edge"
-        )
-        self._check_bar_alignment(df, kind="barh", stacked=False, align="edge")
-        self._check_bar_alignment(
-            df, kind="barh", stacked=False, width=0.9, align="edge"
-        )
-
-        self._check_bar_alignment(df, kind="bar", subplots=True, align="edge")
-        self._check_bar_alignment(
-            df, kind="bar", subplots=True, width=0.9, align="edge"
-        )
-        self._check_bar_alignment(df, kind="barh", subplots=True, align="edge")
-        self._check_bar_alignment(
-            df, kind="barh", subplots=True, width=0.9, align="edge"
-        )
-
-    @pytest.mark.slow
-    def test_bar_log_no_subplots(self):
-        # GH3254, GH3298 matplotlib/matplotlib#1882, #1892
-        # regressions in 1.2.1
-        expected = np.array([0.1, 1.0, 10.0, 100])
-
-        # no subplots
-        df = DataFrame({"A": [3] * 5, "B": list(range(1, 6))}, index=range(5))
-        ax = df.plot.bar(grid=True, log=True)
-        tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected)
-
-    @pytest.mark.slow
-    def test_bar_log_subplots(self):
-        expected = np.array([0.1, 1.0, 10.0, 100.0, 1000.0, 1e4])
-
-        ax = DataFrame([Series([200, 300]), Series([300, 500])]).plot.bar(
-            log=True, subplots=True
-        )
-
-        tm.assert_numpy_array_equal(ax[0].yaxis.get_ticklocs(), expected)
-        tm.assert_numpy_array_equal(ax[1].yaxis.get_ticklocs(), expected)
-
-    @pytest.mark.slow
-    def test_boxplot(self):
-        df = self.hist_df
-        series = df["height"]
-        numeric_cols = df._get_numeric_data().columns
-        labels = [pprint_thing(c) for c in numeric_cols]
-
-        ax = _check_plot_works(df.plot.box)
-        self._check_text_labels(ax.get_xticklabels(), labels)
-        tm.assert_numpy_array_equal(
-            ax.xaxis.get_ticklocs(), np.arange(1, len(numeric_cols) + 1)
-        )
-        assert len(ax.lines) == self.bp_n_objects * len(numeric_cols)
-
-        axes = series.plot.box(rot=40)
-        self._check_ticks_props(axes, xrot=40, yrot=0)
-        tm.close()
-
-        ax = _check_plot_works(series.plot.box)
-
-        positions = np.array([1, 6, 7])
-        ax = df.plot.box(positions=positions)
-        numeric_cols = df._get_numeric_data().columns
-        labels = [pprint_thing(c) for c in numeric_cols]
-        self._check_text_labels(ax.get_xticklabels(), labels)
-        tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), positions)
-        assert len(ax.lines) == self.bp_n_objects * len(numeric_cols)
-
-    @pytest.mark.slow
-    def test_boxplot_vertical(self):
-        df = self.hist_df
-        numeric_cols = df._get_numeric_data().columns
-        labels = [pprint_thing(c) for c in numeric_cols]
-
-        # if horizontal, yticklabels are rotated
-        ax = df.plot.box(rot=50, fontsize=8, vert=False)
-        self._check_ticks_props(ax, xrot=0, yrot=50, ylabelsize=8)
-        self._check_text_labels(ax.get_yticklabels(), labels)
-        assert len(ax.lines) == self.bp_n_objects * len(numeric_cols)
-
-        # _check_plot_works adds an ax so catch warning. see GH #13188
-        with tm.assert_produces_warning(UserWarning):
-            axes = _check_plot_works(df.plot.box, subplots=True, vert=False, logx=True)
-        self._check_axes_shape(axes, axes_num=3, layout=(1, 3))
-        self._check_ax_scales(axes, xaxis="log")
-        for ax, label in zip(axes, labels):
-            self._check_text_labels(ax.get_yticklabels(), [label])
-            assert len(ax.lines) == self.bp_n_objects
-
-        positions = np.array([3, 2, 8])
-        ax = df.plot.box(positions=positions, vert=False)
-        self._check_text_labels(ax.get_yticklabels(), labels)
-        tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), positions)
-        assert len(ax.lines) == self.bp_n_objects * len(numeric_cols)
-
-    @pytest.mark.slow
-    def test_boxplot_return_type(self):
-        df = DataFrame(
-            randn(6, 4),
-            index=list(string.ascii_letters[:6]),
-            columns=["one", "two", "three", "four"],
-        )
-        with pytest.raises(ValueError):
-            df.plot.box(return_type="NOTATYPE")
-
-        result = df.plot.box(return_type="dict")
-        self._check_box_return_type(result, "dict")
-
-        result = df.plot.box(return_type="axes")
-        self._check_box_return_type(result, "axes")
-
-        result = df.plot.box()  # default axes
-        self._check_box_return_type(result, "axes")
-
-        result = df.plot.box(return_type="both")
-        self._check_box_return_type(result, "both")
-
-    @pytest.mark.slow
-    def test_boxplot_subplots_return_type(self):
-        df = self.hist_df
-
-        # normal style: return_type=None
-        result = df.plot.box(subplots=True)
-        assert isinstance(result, Series)
-        self._check_box_return_type(
-            result, None, expected_keys=["height", "weight", "category"]
-        )
-
-        for t in ["dict", "axes", "both"]:
-            returned = df.plot.box(return_type=t, subplots=True)
-            self._check_box_return_type(
-                returned,
-                t,
-                expected_keys=["height", "weight", "category"],
-                check_ax_title=False,
-            )
-
-    @pytest.mark.slow
-    @td.skip_if_no_scipy
-    def test_kde_df(self):
-        df = DataFrame(randn(100, 4))
-        ax = _check_plot_works(df.plot, kind="kde")
-        expected = [pprint_thing(c) for c in df.columns]
-        self._check_legend_labels(ax, labels=expected)
-        self._check_ticks_props(ax, xrot=0)
-
-        ax = df.plot(kind="kde", rot=20, fontsize=5)
-        self._check_ticks_props(ax, xrot=20, xlabelsize=5, ylabelsize=5)
-
-        with tm.assert_produces_warning(UserWarning):
-            axes = _check_plot_works(df.plot, kind="kde", subplots=True)
-        self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
-
-        axes = df.plot(kind="kde", logy=True, subplots=True)
-        self._check_ax_scales(axes, yaxis="log")
-
-    @pytest.mark.slow
-    @td.skip_if_no_scipy
-    def test_kde_missing_vals(self):
-        df = DataFrame(np.random.uniform(size=(100, 4)))
-        df.loc[0, 0] = np.nan
-        _check_plot_works(df.plot, kind="kde")
-
-    @pytest.mark.slow
-    def test_hist_df(self):
-        from matplotlib.patches import Rectangle
-
-        df = DataFrame(randn(100, 4))
-        series = df[0]
-
-        ax = _check_plot_works(df.plot.hist)
-        expected = [pprint_thing(c) for c in df.columns]
-        self._check_legend_labels(ax, labels=expected)
-
-        with tm.assert_produces_warning(UserWarning):
-            axes = _check_plot_works(df.plot.hist, subplots=True, logy=True)
-        self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
-        self._check_ax_scales(axes, yaxis="log")
-
-        axes = series.plot.hist(rot=40)
-        self._check_ticks_props(axes, xrot=40, yrot=0)
-        tm.close()
-
-        ax = series.plot.hist(cumulative=True, bins=4, density=True)
-        # height of last bin (index 5) must be 1.0
-        rects = [x for x in ax.get_children() if isinstance(x, Rectangle)]
-        tm.assert_almost_equal(rects[-1].get_height(), 1.0)
-        tm.close()
-
-        ax = series.plot.hist(cumulative=True, bins=4)
-        rects = [x for x in ax.get_children() if isinstance(x, Rectangle)]
-
-        tm.assert_almost_equal(rects[-2].get_height(), 100.0)
-        tm.close()
-
-        # if horizontal, yticklabels are rotated
-        axes = df.plot.hist(rot=50, fontsize=8, orientation="horizontal")
-        self._check_ticks_props(axes, xrot=0, yrot=50, ylabelsize=8)
-
-    @pytest.mark.parametrize(
-        "weights", [0.1 * np.ones(shape=(100,)), 0.1 * np.ones(shape=(100, 2))]
-    )
-    def test_hist_weights(self, weights):
-        # GH 33173
-        np.random.seed(0)
-        df = pd.DataFrame(dict(zip(["A", "B"], np.random.randn(2, 100,))))
-
-        ax1 = _check_plot_works(df.plot, kind="hist", weights=weights)
-        ax2 = _check_plot_works(df.plot, kind="hist")
-
-        patch_height_with_weights = [patch.get_height() for patch in ax1.patches]
-
-        # original heights with no weights, and we manually multiply with example
-        # weights, so after multiplication, they should be almost same
-        expected_patch_height = [0.1 * patch.get_height() for patch in ax2.patches]
-
-        tm.assert_almost_equal(patch_height_with_weights, expected_patch_height)
-
-    def _check_box_coord(
-        self,
-        patches,
-        expected_y=None,
-        expected_h=None,
-        expected_x=None,
-        expected_w=None,
-    ):
-        result_y = np.array([p.get_y() for p in patches])
-        result_height = np.array([p.get_height() for p in patches])
-        result_x = np.array([p.get_x() for p in patches])
-        result_width = np.array([p.get_width() for p in patches])
-        # dtype is depending on above values, no need to check
-
-        if expected_y is not None:
-            tm.assert_numpy_array_equal(result_y, expected_y, check_dtype=False)
-        if expected_h is not None:
-            tm.assert_numpy_array_equal(result_height, expected_h, check_dtype=False)
-        if expected_x is not None:
-            tm.assert_numpy_array_equal(result_x, expected_x, check_dtype=False)
-        if expected_w is not None:
-            tm.assert_numpy_array_equal(result_width, expected_w, check_dtype=False)
-
-    @pytest.mark.slow
-    def test_hist_df_coord(self):
-        normal_df = DataFrame(
-            {
-                "A": np.repeat(np.array([1, 2, 3, 4, 5]), np.array([10, 9, 8, 7, 6])),
-                "B": np.repeat(np.array([1, 2, 3, 4, 5]), np.array([8, 8, 8, 8, 8])),
-                "C": np.repeat(np.array([1, 2, 3, 4, 5]), np.array([6, 7, 8, 9, 10])),
-            },
-            columns=["A", "B", "C"],
-        )
-
-        nan_df = DataFrame(
-            {
-                "A": np.repeat(
-                    np.array([np.nan, 1, 2, 3, 4, 5]), np.array([3, 10, 9, 8, 7, 6])
-                ),
-                "B": np.repeat(
-                    np.array([1, np.nan, 2, 3, 4, 5]), np.array([8, 3, 8, 8, 8, 8])
-                ),
-                "C": np.repeat(
-                    np.array([1, 2, 3, np.nan, 4, 5]), np.array([6, 7, 8, 3, 9, 10])
-                ),
-            },
-            columns=["A", "B", "C"],
-        )
-
-        for df in [normal_df, nan_df]:
-            ax = df.plot.hist(bins=5)
-            self._check_box_coord(
-                ax.patches[:5],
-                expected_y=np.array([0, 0, 0, 0, 0]),
-                expected_h=np.array([10, 9, 8, 7, 6]),
-            )
-            self._check_box_coord(
-                ax.patches[5:10],
-                expected_y=np.array([0, 0, 0, 0, 0]),
-                expected_h=np.array([8, 8, 8, 8, 8]),
-            )
-            self._check_box_coord(
-                ax.patches[10:],
-                expected_y=np.array([0, 0, 0, 0, 0]),
-                expected_h=np.array([6, 7, 8, 9, 10]),
-            )
-
-            ax = df.plot.hist(bins=5, stacked=True)
-            self._check_box_coord(
-                ax.patches[:5],
-                expected_y=np.array([0, 0, 0, 0, 0]),
-                expected_h=np.array([10, 9, 8, 7, 6]),
-            )
-            self._check_box_coord(
-                ax.patches[5:10],
-                expected_y=np.array([10, 9, 8, 7, 6]),
-                expected_h=np.array([8, 8, 8, 8, 8]),
-            )
-            self._check_box_coord(
-                ax.patches[10:],
-                expected_y=np.array([18, 17, 16, 15, 14]),
-                expected_h=np.array([6, 7, 8, 9, 10]),
-            )
-
-            axes = df.plot.hist(bins=5, stacked=True, subplots=True)
-            self._check_box_coord(
-                axes[0].patches,
-                expected_y=np.array([0, 0, 0, 0, 0]),
-                expected_h=np.array([10, 9, 8, 7, 6]),
-            )
-            self._check_box_coord(
-                axes[1].patches,
-                expected_y=np.array([0, 0, 0, 0, 0]),
-                expected_h=np.array([8, 8, 8, 8, 8]),
-            )
-            self._check_box_coord(
-                axes[2].patches,
-                expected_y=np.array([0, 0, 0, 0, 0]),
-                expected_h=np.array([6, 7, 8, 9, 10]),
-            )
-
-            # horizontal
-            ax = df.plot.hist(bins=5, orientation="horizontal")
-            self._check_box_coord(
-                ax.patches[:5],
-                expected_x=np.array([0, 0, 0, 0, 0]),
-                expected_w=np.array([10, 9, 8, 7, 6]),
-            )
-            self._check_box_coord(
-                ax.patches[5:10],
-                expected_x=np.array([0, 0, 0, 0, 0]),
-                expected_w=np.array([8, 8, 8, 8, 8]),
-            )
-            self._check_box_coord(
-                ax.patches[10:],
-                expected_x=np.array([0, 0, 0, 0, 0]),
-                expected_w=np.array([6, 7, 8, 9, 10]),
-            )
-
-            ax = df.plot.hist(bins=5, stacked=True, orientation="horizontal")
-            self._check_box_coord(
-                ax.patches[:5],
-                expected_x=np.array([0, 0, 0, 0, 0]),
-                expected_w=np.array([10, 9, 8, 7, 6]),
-            )
-            self._check_box_coord(
-                ax.patches[5:10],
-                expected_x=np.array([10, 9, 8, 7, 6]),
-                expected_w=np.array([8, 8, 8, 8, 8]),
-            )
-            self._check_box_coord(
-                ax.patches[10:],
-                expected_x=np.array([18, 17, 16, 15, 14]),
-                expected_w=np.array([6, 7, 8, 9, 10]),
-            )
-
-            axes = df.plot.hist(
-                bins=5, stacked=True, subplots=True, orientation="horizontal"
-            )
-            self._check_box_coord(
-                axes[0].patches,
-                expected_x=np.array([0, 0, 0, 0, 0]),
-                expected_w=np.array([10, 9, 8, 7, 6]),
-            )
-            self._check_box_coord(
-                axes[1].patches,
-                expected_x=np.array([0, 0, 0, 0, 0]),
-                expected_w=np.array([8, 8, 8, 8, 8]),
-            )
-            self._check_box_coord(
-                axes[2].patches,
-                expected_x=np.array([0, 0, 0, 0, 0]),
-                expected_w=np.array([6, 7, 8, 9, 10]),
-            )
-
-    @pytest.mark.slow
-    def test_plot_int_columns(self):
-        df = DataFrame(randn(100, 4)).cumsum()
-        _check_plot_works(df.plot, legend=True)
-
-    @pytest.mark.slow
-    def test_df_legend_labels(self):
-        kinds = ["line", "bar", "barh", "kde", "area", "hist"]
-        df = DataFrame(rand(3, 3), columns=["a", "b", "c"])
-        df2 = DataFrame(rand(3, 3), columns=["d", "e", "f"])
-        df3 = DataFrame(rand(3, 3), columns=["g", "h", "i"])
-        df4 = DataFrame(rand(3, 3), columns=["j", "k", "l"])
-
-        for kind in kinds:
-
-            ax = df.plot(kind=kind, legend=True)
-            self._check_legend_labels(ax, labels=df.columns)
-
-            ax = df2.plot(kind=kind, legend=False, ax=ax)
-            self._check_legend_labels(ax, labels=df.columns)
-
-            ax = df3.plot(kind=kind, legend=True, ax=ax)
-            self._check_legend_labels(ax, labels=df.columns.union(df3.columns))
-
-            ax = df4.plot(kind=kind, legend="reverse", ax=ax)
-            expected = list(df.columns.union(df3.columns)) + list(reversed(df4.columns))
-            self._check_legend_labels(ax, labels=expected)
-
-        # Secondary Y
-        ax = df.plot(legend=True, secondary_y="b")
-        self._check_legend_labels(ax, labels=["a", "b (right)", "c"])
-        ax = df2.plot(legend=False, ax=ax)
-        self._check_legend_labels(ax, labels=["a", "b (right)", "c"])
-        ax = df3.plot(kind="bar", legend=True, secondary_y="h", ax=ax)
-        self._check_legend_labels(
-            ax, labels=["a", "b (right)", "c", "g", "h (right)", "i"]
-        )
-
-        # Time Series
-        ind = date_range("1/1/2014", periods=3)
-        df = DataFrame(randn(3, 3), columns=["a", "b", "c"], index=ind)
-        df2 = DataFrame(randn(3, 3), columns=["d", "e", "f"], index=ind)
-        df3 = DataFrame(randn(3, 3), columns=["g", "h", "i"], index=ind)
-        ax = df.plot(legend=True, secondary_y="b")
-        self._check_legend_labels(ax, labels=["a", "b (right)", "c"])
-        ax = df2.plot(legend=False, ax=ax)
-        self._check_legend_labels(ax, labels=["a", "b (right)", "c"])
-        ax = df3.plot(legend=True, ax=ax)
-        self._check_legend_labels(ax, labels=["a", "b (right)", "c", "g", "h", "i"])
-
-        # scatter
-        ax = df.plot.scatter(x="a", y="b", label="data1")
-        self._check_legend_labels(ax, labels=["data1"])
-        ax = df2.plot.scatter(x="d", y="e", legend=False, label="data2", ax=ax)
-        self._check_legend_labels(ax, labels=["data1"])
-        ax = df3.plot.scatter(x="g", y="h", label="data3", ax=ax)
-        self._check_legend_labels(ax, labels=["data1", "data3"])
-
-        # ensure label args pass through and
-        # index name does not mutate
-        # column names don't mutate
-        df5 = df.set_index("a")
-        ax = df5.plot(y="b")
-        self._check_legend_labels(ax, labels=["b"])
-        ax = df5.plot(y="b", label="LABEL_b")
-        self._check_legend_labels(ax, labels=["LABEL_b"])
-        self._check_text_labels(ax.xaxis.get_label(), "a")
-        ax = df5.plot(y="c", label="LABEL_c", ax=ax)
-        self._check_legend_labels(ax, labels=["LABEL_b", "LABEL_c"])
-        assert df5.columns.tolist() == ["b", "c"]
-
-    def test_missing_marker_multi_plots_on_same_ax(self):
-        # GH 18222
-        df = pd.DataFrame(
-            data=[[1, 1, 1, 1], [2, 2, 4, 8]], columns=["x", "r", "g", "b"]
-        )
-        fig, ax = self.plt.subplots(nrows=1, ncols=3)
-        # Left plot
-        df.plot(x="x", y="r", linewidth=0, marker="o", color="r", ax=ax[0])
-        df.plot(x="x", y="g", linewidth=1, marker="x", color="g", ax=ax[0])
-        df.plot(x="x", y="b", linewidth=1, marker="o", color="b", ax=ax[0])
-        self._check_legend_labels(ax[0], labels=["r", "g", "b"])
-        self._check_legend_marker(ax[0], expected_markers=["o", "x", "o"])
-        # Center plot
-        df.plot(x="x", y="b", linewidth=1, marker="o", color="b", ax=ax[1])
-        df.plot(x="x", y="r", linewidth=0, marker="o", color="r", ax=ax[1])
-        df.plot(x="x", y="g", linewidth=1, marker="x", color="g", ax=ax[1])
-        self._check_legend_labels(ax[1], labels=["b", "r", "g"])
-        self._check_legend_marker(ax[1], expected_markers=["o", "o", "x"])
-        # Right plot
-        df.plot(x="x", y="g", linewidth=1, marker="x", color="g", ax=ax[2])
-        df.plot(x="x", y="b", linewidth=1, marker="o", color="b", ax=ax[2])
-        df.plot(x="x", y="r", linewidth=0, marker="o", color="r", ax=ax[2])
-        self._check_legend_labels(ax[2], labels=["g", "b", "r"])
-        self._check_legend_marker(ax[2], expected_markers=["x", "o", "o"])
-
-    def test_legend_name(self):
-        multi = DataFrame(
-            randn(4, 4),
-            columns=[np.array(["a", "a", "b", "b"]), np.array(["x", "y", "x", "y"])],
-        )
-        multi.columns.names = ["group", "individual"]
-
-        ax = multi.plot()
-        leg_title = ax.legend_.get_title()
-        self._check_text_labels(leg_title, "group,individual")
-
-        df = DataFrame(randn(5, 5))
-        ax = df.plot(legend=True, ax=ax)
-        leg_title = ax.legend_.get_title()
-        self._check_text_labels(leg_title, "group,individual")
-
-        df.columns.name = "new"
-        ax = df.plot(legend=False, ax=ax)
-        leg_title = ax.legend_.get_title()
-        self._check_text_labels(leg_title, "group,individual")
-
-        ax = df.plot(legend=True, ax=ax)
-        leg_title = ax.legend_.get_title()
-        self._check_text_labels(leg_title, "new")
-
-    @pytest.mark.slow
-    def test_no_legend(self):
-        kinds = ["line", "bar", "barh", "kde", "area", "hist"]
-        df = DataFrame(rand(3, 3), columns=["a", "b", "c"])
-
-        for kind in kinds:
-
-            ax = df.plot(kind=kind, legend=False)
-            self._check_legend_labels(ax, visible=False)
-
-    @pytest.mark.slow
-    def test_style_by_column(self):
-        import matplotlib.pyplot as plt
-
-        fig = plt.gcf()
-
-        df = DataFrame(randn(100, 3))
-        for markers in [
-            {0: "^", 1: "+", 2: "o"},
-            {0: "^", 1: "+"},
-            ["^", "+", "o"],
-            ["^", "+"],
-        ]:
-            fig.clf()
-            fig.add_subplot(111)
-            ax = df.plot(style=markers)
-            for i, l in enumerate(ax.get_lines()[: len(markers)]):
-                assert l.get_marker() == markers[i]
-
-    @pytest.mark.slow
-    def test_line_label_none(self):
-        s = Series([1, 2])
-        ax = s.plot()
-        assert ax.get_legend() is None
-
-        ax = s.plot(legend=True)
-        assert ax.get_legend().get_texts()[0].get_text() == "None"
-
-    @pytest.mark.slow
-    def test_line_colors(self):
-        from matplotlib import cm
-
-        custom_colors = "rgcby"
-        df = DataFrame(randn(5, 5))
-
-        ax = df.plot(color=custom_colors)
-        self._check_colors(ax.get_lines(), linecolors=custom_colors)
-
-        tm.close()
-
-        ax2 = df.plot(color=custom_colors)
-        lines2 = ax2.get_lines()
-
-        for l1, l2 in zip(ax.get_lines(), lines2):
-            assert l1.get_color() == l2.get_color()
-
-        tm.close()
-
-        ax = df.plot(colormap="jet")
-        rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))]
-        self._check_colors(ax.get_lines(), linecolors=rgba_colors)
-        tm.close()
-
-        ax = df.plot(colormap=cm.jet)
-        rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))]
-        self._check_colors(ax.get_lines(), linecolors=rgba_colors)
-        tm.close()
-
-        # make color a list if plotting one column frame
-        # handles cases like df.plot(color='DodgerBlue')
-        ax = df.loc[:, [0]].plot(color="DodgerBlue")
-        self._check_colors(ax.lines, linecolors=["DodgerBlue"])
-
-        ax = df.plot(color="red")
-        self._check_colors(ax.get_lines(), linecolors=["red"] * 5)
-        tm.close()
-
-        # GH 10299
-        custom_colors = ["#FF0000", "#0000FF", "#FFFF00", "#000000", "#FFFFFF"]
-        ax = df.plot(color=custom_colors)
-        self._check_colors(ax.get_lines(), linecolors=custom_colors)
-        tm.close()
-
-    @pytest.mark.slow
-    def test_dont_modify_colors(self):
-        colors = ["r", "g", "b"]
-        pd.DataFrame(np.random.rand(10, 2)).plot(color=colors)
-        assert len(colors) == 3
-
-    @pytest.mark.slow
-    def test_line_colors_and_styles_subplots(self):
-        # GH 9894
-        from matplotlib import cm
-
-        default_colors = self._unpack_cycler(self.plt.rcParams)
-
-        df = DataFrame(randn(5, 5))
-
-        axes = df.plot(subplots=True)
-        for ax, c in zip(axes, list(default_colors)):
-            c = [c]
-            self._check_colors(ax.get_lines(), linecolors=c)
-        tm.close()
-
-        # single color char
-        axes = df.plot(subplots=True, color="k")
-        for ax in axes:
-            self._check_colors(ax.get_lines(), linecolors=["k"])
-        tm.close()
-
-        # single color str
-        axes = df.plot(subplots=True, color="green")
-        for ax in axes:
-            self._check_colors(ax.get_lines(), linecolors=["green"])
-        tm.close()
-
-        custom_colors = "rgcby"
-        axes = df.plot(color=custom_colors, subplots=True)
-        for ax, c in zip(axes, list(custom_colors)):
-            self._check_colors(ax.get_lines(), linecolors=[c])
-        tm.close()
-
-        axes = df.plot(color=list(custom_colors), subplots=True)
-        for ax, c in zip(axes, list(custom_colors)):
-            self._check_colors(ax.get_lines(), linecolors=[c])
-        tm.close()
-
-        # GH 10299
-        custom_colors = ["#FF0000", "#0000FF", "#FFFF00", "#000000", "#FFFFFF"]
-        axes = df.plot(color=custom_colors, subplots=True)
-        for ax, c in zip(axes, list(custom_colors)):
-            self._check_colors(ax.get_lines(), linecolors=[c])
-        tm.close()
-
-        rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))]
-        for cmap in ["jet", cm.jet]:
-            axes = df.plot(colormap=cmap, subplots=True)
-            for ax, c in zip(axes, rgba_colors):
-                self._check_colors(ax.get_lines(), linecolors=[c])
-            tm.close()
-
-        # make color a list if plotting one column frame
-        # handles cases like df.plot(color='DodgerBlue')
-        axes = df.loc[:, [0]].plot(color="DodgerBlue", subplots=True)
-        self._check_colors(axes[0].lines, linecolors=["DodgerBlue"])
-
-        # single character style
-        axes = df.plot(style="r", subplots=True)
-        for ax in axes:
-            self._check_colors(ax.get_lines(), linecolors=["r"])
-        tm.close()
-
-        # list of styles
-        styles = list("rgcby")
-        axes = df.plot(style=styles, subplots=True)
-        for ax, c in zip(axes, styles):
-            self._check_colors(ax.get_lines(), linecolors=[c])
-        tm.close()
-
-    @pytest.mark.slow
-    def test_area_colors(self):
-        from matplotlib import cm
-        from matplotlib.collections import PolyCollection
-
-        custom_colors = "rgcby"
-        df = DataFrame(rand(5, 5))
-
-        ax = df.plot.area(color=custom_colors)
-        self._check_colors(ax.get_lines(), linecolors=custom_colors)
-        poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)]
-        self._check_colors(poly, facecolors=custom_colors)
-
-        handles, labels = ax.get_legend_handles_labels()
-        self._check_colors(handles, facecolors=custom_colors)
-
-        for h in handles:
-            assert h.get_alpha() is None
-        tm.close()
-
-        ax = df.plot.area(colormap="jet")
-        jet_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))]
-        self._check_colors(ax.get_lines(), linecolors=jet_colors)
-        poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)]
-        self._check_colors(poly, facecolors=jet_colors)
-
-        handles, labels = ax.get_legend_handles_labels()
-        self._check_colors(handles, facecolors=jet_colors)
-        for h in handles:
-            assert h.get_alpha() is None
-        tm.close()
-
-        # When stacked=False, alpha is set to 0.5
-        ax = df.plot.area(colormap=cm.jet, stacked=False)
-        self._check_colors(ax.get_lines(), linecolors=jet_colors)
-        poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)]
-        jet_with_alpha = [(c[0], c[1], c[2], 0.5) for c in jet_colors]
-        self._check_colors(poly, facecolors=jet_with_alpha)
-
-        handles, labels = ax.get_legend_handles_labels()
-        linecolors = jet_with_alpha
-        self._check_colors(handles[: len(jet_colors)], linecolors=linecolors)
-        for h in handles:
-            assert h.get_alpha() == 0.5
-
-    @pytest.mark.slow
-    def test_hist_colors(self):
-        default_colors = self._unpack_cycler(self.plt.rcParams)
-
-        df = DataFrame(randn(5, 5))
-        ax = df.plot.hist()
-        self._check_colors(ax.patches[::10], facecolors=default_colors[:5])
-        tm.close()
-
-        custom_colors = "rgcby"
-        ax = df.plot.hist(color=custom_colors)
-        self._check_colors(ax.patches[::10], facecolors=custom_colors)
-        tm.close()
-
-        from matplotlib import cm
-
-        # Test str -> colormap functionality
-        ax = df.plot.hist(colormap="jet")
-        rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)]
-        self._check_colors(ax.patches[::10], facecolors=rgba_colors)
-        tm.close()
-
-        # Test colormap functionality
-        ax = df.plot.hist(colormap=cm.jet)
-        rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)]
-        self._check_colors(ax.patches[::10], facecolors=rgba_colors)
-        tm.close()
-
-        ax = df.loc[:, [0]].plot.hist(color="DodgerBlue")
-        self._check_colors([ax.patches[0]], facecolors=["DodgerBlue"])
-
-        ax = df.plot(kind="hist", color="green")
-        self._check_colors(ax.patches[::10], facecolors=["green"] * 5)
-        tm.close()
-
-    @pytest.mark.slow
-    @td.skip_if_no_scipy
-    def test_kde_colors(self):
-        from matplotlib import cm
-
-        custom_colors = "rgcby"
-        df = DataFrame(rand(5, 5))
-
-        ax = df.plot.kde(color=custom_colors)
-        self._check_colors(ax.get_lines(), linecolors=custom_colors)
-        tm.close()
-
-        ax = df.plot.kde(colormap="jet")
-        rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))]
-        self._check_colors(ax.get_lines(), linecolors=rgba_colors)
-        tm.close()
-
-        ax = df.plot.kde(colormap=cm.jet)
-        rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))]
-        self._check_colors(ax.get_lines(), linecolors=rgba_colors)
-
-    @pytest.mark.slow
-    @td.skip_if_no_scipy
-    def test_kde_colors_and_styles_subplots(self):
-        from matplotlib import cm
-
-        default_colors = self._unpack_cycler(self.plt.rcParams)
-
-        df = DataFrame(randn(5, 5))
-
-        axes = df.plot(kind="kde", subplots=True)
-        for ax, c in zip(axes, list(default_colors)):
-            self._check_colors(ax.get_lines(), linecolors=[c])
-        tm.close()
-
-        # single color char
-        axes = df.plot(kind="kde", color="k", subplots=True)
-        for ax in axes:
-            self._check_colors(ax.get_lines(), linecolors=["k"])
-        tm.close()
-
-        # single color str
-        axes = df.plot(kind="kde", color="red", subplots=True)
-        for ax in axes:
-            self._check_colors(ax.get_lines(), linecolors=["red"])
-        tm.close()
-
-        custom_colors = "rgcby"
-        axes = df.plot(kind="kde", color=custom_colors, subplots=True)
-        for ax, c in zip(axes, list(custom_colors)):
-            self._check_colors(ax.get_lines(), linecolors=[c])
-        tm.close()
-
-        rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))]
-        for cmap in ["jet", cm.jet]:
-            axes = df.plot(kind="kde", colormap=cmap, subplots=True)
-            for ax, c in zip(axes, rgba_colors):
-                self._check_colors(ax.get_lines(), linecolors=[c])
-            tm.close()
-
-        # make color a list if plotting one column frame
-        # handles cases like df.plot(color='DodgerBlue')
-        axes = df.loc[:, [0]].plot(kind="kde", color="DodgerBlue", subplots=True)
-        self._check_colors(axes[0].lines, linecolors=["DodgerBlue"])
-
-        # single character style
-        axes = df.plot(kind="kde", style="r", subplots=True)
-        for ax in axes:
-            self._check_colors(ax.get_lines(), linecolors=["r"])
-        tm.close()
-
-        # list of styles
-        styles = list("rgcby")
-        axes = df.plot(kind="kde", style=styles, subplots=True)
-        for ax, c in zip(axes, styles):
-            self._check_colors(ax.get_lines(), linecolors=[c])
-        tm.close()
-
-    @pytest.mark.slow
-    def test_boxplot_colors(self):
-        def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None):
-            # TODO: outside this func?
-            if fliers_c is None:
-                fliers_c = "k"
-            self._check_colors(bp["boxes"], linecolors=[box_c] * len(bp["boxes"]))
-            self._check_colors(
-                bp["whiskers"], linecolors=[whiskers_c] * len(bp["whiskers"])
-            )
-            self._check_colors(
-                bp["medians"], linecolors=[medians_c] * len(bp["medians"])
-            )
-            self._check_colors(bp["fliers"], linecolors=[fliers_c] * len(bp["fliers"]))
-            self._check_colors(bp["caps"], linecolors=[caps_c] * len(bp["caps"]))
-
-        default_colors = self._unpack_cycler(self.plt.rcParams)
-
-        df = DataFrame(randn(5, 5))
-        bp = df.plot.box(return_type="dict")
-        _check_colors(bp, default_colors[0], default_colors[0], default_colors[2])
-        tm.close()
-
-        dict_colors = dict(
-            boxes="#572923", whiskers="#982042", medians="#804823", caps="#123456"
-        )
-        bp = df.plot.box(color=dict_colors, sym="r+", return_type="dict")
-        _check_colors(
-            bp,
-            dict_colors["boxes"],
-            dict_colors["whiskers"],
-            dict_colors["medians"],
-            dict_colors["caps"],
-            "r",
-        )
-        tm.close()
-
-        # partial colors
-        dict_colors = dict(whiskers="c", medians="m")
-        bp = df.plot.box(color=dict_colors, return_type="dict")
-        _check_colors(bp, default_colors[0], "c", "m")
-        tm.close()
-
-        from matplotlib import cm
-
-        # Test str -> colormap functionality
-        bp = df.plot.box(colormap="jet", return_type="dict")
-        jet_colors = [cm.jet(n) for n in np.linspace(0, 1, 3)]
-        _check_colors(bp, jet_colors[0], jet_colors[0], jet_colors[2])
-        tm.close()
-
-        # Test colormap functionality
-        bp = df.plot.box(colormap=cm.jet, return_type="dict")
-        _check_colors(bp, jet_colors[0], jet_colors[0], jet_colors[2])
-        tm.close()
-
-        # string color is applied to all artists except fliers
-        bp = df.plot.box(color="DodgerBlue", return_type="dict")
-        _check_colors(bp, "DodgerBlue", "DodgerBlue", "DodgerBlue", "DodgerBlue")
-
-        # tuple is also applied to all artists except fliers
-        bp = df.plot.box(color=(0, 1, 0), sym="#123456", return_type="dict")
-        _check_colors(bp, (0, 1, 0), (0, 1, 0), (0, 1, 0), (0, 1, 0), "#123456")
-
-        with pytest.raises(ValueError):
-            # Color contains invalid key results in ValueError
-            df.plot.box(color=dict(boxes="red", xxxx="blue"))
-
-    @pytest.mark.parametrize(
-        "props, expected",
-        [
-            ("boxprops", "boxes"),
-            ("whiskerprops", "whiskers"),
-            ("capprops", "caps"),
-            ("medianprops", "medians"),
-        ],
-    )
-    def test_specified_props_kwd_plot_box(self, props, expected):
-        # GH 30346
-        df = DataFrame({k: np.random.random(100) for k in "ABC"})
-        kwd = {props: dict(color="C1")}
-        result = df.plot.box(return_type="dict", **kwd)
-
-        assert result[expected][0].get_color() == "C1"
-
-    def test_default_color_cycle(self):
-        import matplotlib.pyplot as plt
-        import cycler
-
-        colors = list("rgbk")
-        plt.rcParams["axes.prop_cycle"] = cycler.cycler("color", colors)
-
-        df = DataFrame(randn(5, 3))
-        ax = df.plot()
-
-        expected = self._unpack_cycler(plt.rcParams)[:3]
-        self._check_colors(ax.get_lines(), linecolors=expected)
-
-    def test_unordered_ts(self):
-        df = DataFrame(
-            np.array([3.0, 2.0, 1.0]),
-            index=[date(2012, 10, 1), date(2012, 9, 1), date(2012, 8, 1)],
-            columns=["test"],
-        )
-        ax = df.plot()
-        xticks = ax.lines[0].get_xdata()
-        assert xticks[0] < xticks[1]
-        ydata = ax.lines[0].get_ydata()
-        tm.assert_numpy_array_equal(ydata, np.array([1.0, 2.0, 3.0]))
-
-    @td.skip_if_no_scipy
-    def test_kind_both_ways(self):
-        df = DataFrame({"x": [1, 2, 3]})
-        for kind in plotting.PlotAccessor._common_kinds:
-
-            df.plot(kind=kind)
-            getattr(df.plot, kind)()
-        for kind in ["scatter", "hexbin"]:
-            df.plot("x", "x", kind=kind)
-            getattr(df.plot, kind)("x", "x")
-
-    def test_all_invalid_plot_data(self):
-        df = DataFrame(list("abcd"))
-        for kind in plotting.PlotAccessor._common_kinds:
-
-            msg = "no numeric data to plot"
-            with pytest.raises(TypeError, match=msg):
-                df.plot(kind=kind)
-
-    @pytest.mark.slow
-    def test_partially_invalid_plot_data(self):
-        with tm.RNGContext(42):
-            df = DataFrame(randn(10, 2), dtype=object)
-            df[np.random.rand(df.shape[0]) > 0.5] = "a"
-            for kind in plotting.PlotAccessor._common_kinds:
-
-                msg = "no numeric data to plot"
-                with pytest.raises(TypeError, match=msg):
-                    df.plot(kind=kind)
-
-        with tm.RNGContext(42):
-            # area plot doesn't support positive/negative mixed data
-            kinds = ["area"]
-            df = DataFrame(rand(10, 2), dtype=object)
-            df[np.random.rand(df.shape[0]) > 0.5] = "a"
-            for kind in kinds:
-                with pytest.raises(TypeError):
-                    df.plot(kind=kind)
-
-    def test_invalid_kind(self):
-        df = DataFrame(randn(10, 2))
-        with pytest.raises(ValueError):
-            df.plot(kind="aasdf")
-
-    @pytest.mark.parametrize(
-        "x,y,lbl",
-        [
-            (["B", "C"], "A", "a"),
-            (["A"], ["B", "C"], ["b", "c"]),
-            ("A", ["B", "C"], "badlabel"),
-        ],
-    )
-    def test_invalid_xy_args(self, x, y, lbl):
-        # GH 18671, 19699 allows y to be list-like but not x
-        df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]})
-        with pytest.raises(ValueError):
-            df.plot(x=x, y=y, label=lbl)
-
-    @pytest.mark.parametrize("x,y", [("A", "B"), (["A"], "B")])
-    def test_invalid_xy_args_dup_cols(self, x, y):
-        # GH 18671, 19699 allows y to be list-like but not x
-        df = DataFrame([[1, 3, 5], [2, 4, 6]], columns=list("AAB"))
-        with pytest.raises(ValueError):
-            df.plot(x=x, y=y)
-
-    @pytest.mark.parametrize(
-        "x,y,lbl,colors",
-        [
-            ("A", ["B"], ["b"], ["red"]),
-            ("A", ["B", "C"], ["b", "c"], ["red", "blue"]),
-            (0, [1, 2], ["bokeh", "cython"], ["green", "yellow"]),
-        ],
-    )
-    def test_y_listlike(self, x, y, lbl, colors):
-        # GH 19699: tests list-like y and verifies lbls & colors
-        df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]})
-        _check_plot_works(df.plot, x="A", y=y, label=lbl)
-
-        ax = df.plot(x=x, y=y, label=lbl, color=colors)
-        assert len(ax.lines) == len(y)
-        self._check_colors(ax.get_lines(), linecolors=colors)
-
-    @pytest.mark.parametrize("x,y,colnames", [(0, 1, ["A", "B"]), (1, 0, [0, 1])])
-    def test_xy_args_integer(self, x, y, colnames):
-        # GH 20056: tests integer args for xy and checks col names
-        df = DataFrame({"A": [1, 2], "B": [3, 4]})
-        df.columns = colnames
-        _check_plot_works(df.plot, x=x, y=y)
-
-    @pytest.mark.slow
-    def test_hexbin_basic(self):
-        df = self.hexbin_df
-
-        ax = df.plot.hexbin(x="A", y="B", gridsize=10)
-        # TODO: need better way to test. This just does existence.
-        assert len(ax.collections) == 1
-
-        # GH 6951
-        axes = df.plot.hexbin(x="A", y="B", subplots=True)
-        # hexbin should have 2 axes in the figure, 1 for plotting and another
-        # is colorbar
-        assert len(axes[0].figure.axes) == 2
-        # return value is single axes
-        self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
-
-    @pytest.mark.slow
-    def test_hexbin_with_c(self):
-        df = self.hexbin_df
-
-        ax = df.plot.hexbin(x="A", y="B", C="C")
-        assert len(ax.collections) == 1
-
-        ax = df.plot.hexbin(x="A", y="B", C="C", reduce_C_function=np.std)
-        assert len(ax.collections) == 1
-
-    @pytest.mark.slow
-    def test_hexbin_cmap(self):
-        df = self.hexbin_df
-
-        # Default to BuGn
-        ax = df.plot.hexbin(x="A", y="B")
-        assert ax.collections[0].cmap.name == "BuGn"
-
-        cm = "cubehelix"
-        ax = df.plot.hexbin(x="A", y="B", colormap=cm)
-        assert ax.collections[0].cmap.name == cm
-
-    @pytest.mark.slow
-    def test_no_color_bar(self):
-        df = self.hexbin_df
-
-        ax = df.plot.hexbin(x="A", y="B", colorbar=None)
-        assert ax.collections[0].colorbar is None
-
-    @pytest.mark.slow
-    def test_allow_cmap(self):
-        df = self.hexbin_df
-
-        ax = df.plot.hexbin(x="A", y="B", cmap="YlGn")
-        assert ax.collections[0].cmap.name == "YlGn"
-
-        with pytest.raises(TypeError):
-            df.plot.hexbin(x="A", y="B", cmap="YlGn", colormap="BuGn")
-
-    @pytest.mark.slow
-    def test_pie_df(self):
-        df = DataFrame(
-            np.random.rand(5, 3),
-            columns=["X", "Y", "Z"],
-            index=["a", "b", "c", "d", "e"],
-        )
-        with pytest.raises(ValueError):
-            df.plot.pie()
-
-        ax = _check_plot_works(df.plot.pie, y="Y")
-        self._check_text_labels(ax.texts, df.index)
-
-        ax = _check_plot_works(df.plot.pie, y=2)
-        self._check_text_labels(ax.texts, df.index)
-
-        # _check_plot_works adds an ax so catch warning. see GH #13188
-        with tm.assert_produces_warning(UserWarning):
-            axes = _check_plot_works(df.plot.pie, subplots=True)
-        assert len(axes) == len(df.columns)
-        for ax in axes:
-            self._check_text_labels(ax.texts, df.index)
-        for ax, ylabel in zip(axes, df.columns):
-            assert ax.get_ylabel() == ylabel
-
-        labels = ["A", "B", "C", "D", "E"]
-        color_args = ["r", "g", "b", "c", "m"]
-        with tm.assert_produces_warning(UserWarning):
-            axes = _check_plot_works(
-                df.plot.pie, subplots=True, labels=labels, colors=color_args
-            )
-        assert len(axes) == len(df.columns)
-
-        for ax in axes:
-            self._check_text_labels(ax.texts, labels)
-            self._check_colors(ax.patches, facecolors=color_args)
-
-    def test_pie_df_nan(self):
-        df = DataFrame(np.random.rand(4, 4))
-        for i in range(4):
-            df.iloc[i, i] = np.nan
-        fig, axes = self.plt.subplots(ncols=4)
-        df.plot.pie(subplots=True, ax=axes, legend=True)
-
-        base_expected = ["0", "1", "2", "3"]
-        for i, ax in enumerate(axes):
-            expected = list(base_expected)  # force copy
-            expected[i] = ""
-            result = [x.get_text() for x in ax.texts]
-            assert result == expected
-            # legend labels
-            # NaN's not included in legend with subplots
-            # see https://github.com/pandas-dev/pandas/issues/8390
-            assert [x.get_text() for x in ax.get_legend().get_texts()] == base_expected[
-                :i
-            ] + base_expected[i + 1 :]
-
-    @pytest.mark.slow
-    def test_errorbar_plot(self):
-        with warnings.catch_warnings():
-            d = {"x": np.arange(12), "y": np.arange(12, 0, -1)}
-            df = DataFrame(d)
-            d_err = {"x": np.ones(12) * 0.2, "y": np.ones(12) * 0.4}
-            df_err = DataFrame(d_err)
-
-            # check line plots
-            ax = _check_plot_works(df.plot, yerr=df_err, logy=True)
-            self._check_has_errorbars(ax, xerr=0, yerr=2)
-            ax = _check_plot_works(df.plot, yerr=df_err, logx=True, logy=True)
-            self._check_has_errorbars(ax, xerr=0, yerr=2)
-            ax = _check_plot_works(df.plot, yerr=df_err, loglog=True)
-            self._check_has_errorbars(ax, xerr=0, yerr=2)
-
-            kinds = ["line", "bar", "barh"]
-            for kind in kinds:
-                ax = _check_plot_works(df.plot, yerr=df_err["x"], kind=kind)
-                self._check_has_errorbars(ax, xerr=0, yerr=2)
-                ax = _check_plot_works(df.plot, yerr=d_err, kind=kind)
-                self._check_has_errorbars(ax, xerr=0, yerr=2)
-                ax = _check_plot_works(df.plot, yerr=df_err, xerr=df_err, kind=kind)
-                self._check_has_errorbars(ax, xerr=2, yerr=2)
-                ax = _check_plot_works(
-                    df.plot, yerr=df_err["x"], xerr=df_err["x"], kind=kind
-                )
-                self._check_has_errorbars(ax, xerr=2, yerr=2)
-                ax = _check_plot_works(df.plot, xerr=0.2, yerr=0.2, kind=kind)
-                self._check_has_errorbars(ax, xerr=2, yerr=2)
-
-                # _check_plot_works adds an ax so catch warning. see GH #13188
-                axes = _check_plot_works(
-                    df.plot, yerr=df_err, xerr=df_err, subplots=True, kind=kind
-                )
-                self._check_has_errorbars(axes, xerr=1, yerr=1)
-
-            ax = _check_plot_works(
-                (df + 1).plot, yerr=df_err, xerr=df_err, kind="bar", log=True
-            )
-            self._check_has_errorbars(ax, xerr=2, yerr=2)
-
-            # yerr is raw error values
-            ax = _check_plot_works(df["y"].plot, yerr=np.ones(12) * 0.4)
-            self._check_has_errorbars(ax, xerr=0, yerr=1)
-            ax = _check_plot_works(df.plot, yerr=np.ones((2, 12)) * 0.4)
-            self._check_has_errorbars(ax, xerr=0, yerr=2)
-
-            # yerr is column name
-            for yerr in ["yerr", "誤差"]:
-                s_df = df.copy()
-                s_df[yerr] = np.ones(12) * 0.2
-                ax = _check_plot_works(s_df.plot, yerr=yerr)
-                self._check_has_errorbars(ax, xerr=0, yerr=2)
-                ax = _check_plot_works(s_df.plot, y="y", x="x", yerr=yerr)
-                self._check_has_errorbars(ax, xerr=0, yerr=1)
-
-            with pytest.raises(ValueError):
-                df.plot(yerr=np.random.randn(11))
-
-            df_err = DataFrame({"x": ["zzz"] * 12, "y": ["zzz"] * 12})
-            with pytest.raises((ValueError, TypeError)):
-                df.plot(yerr=df_err)
-
-    @pytest.mark.xfail(reason="Iterator is consumed", raises=ValueError)
-    @pytest.mark.slow
-    def test_errorbar_plot_iterator(self):
-        with warnings.catch_warnings():
-            d = {"x": np.arange(12), "y": np.arange(12, 0, -1)}
-            df = DataFrame(d)
-
-            # yerr is iterator
-            ax = _check_plot_works(df.plot, yerr=itertools.repeat(0.1, len(df)))
-            self._check_has_errorbars(ax, xerr=0, yerr=2)
-
-    @pytest.mark.slow
-    def test_errorbar_with_integer_column_names(self):
-        # test with integer column names
-        df = DataFrame(np.random.randn(10, 2))
-        df_err = DataFrame(np.random.randn(10, 2))
-        ax = _check_plot_works(df.plot, yerr=df_err)
-        self._check_has_errorbars(ax, xerr=0, yerr=2)
-        ax = _check_plot_works(df.plot, y=0, yerr=1)
-        self._check_has_errorbars(ax, xerr=0, yerr=1)
-
-    @pytest.mark.slow
-    def test_errorbar_with_partial_columns(self):
-        df = DataFrame(np.random.randn(10, 3))
-        df_err = DataFrame(np.random.randn(10, 2), columns=[0, 2])
-        kinds = ["line", "bar"]
-        for kind in kinds:
-            ax = _check_plot_works(df.plot, yerr=df_err, kind=kind)
-            self._check_has_errorbars(ax, xerr=0, yerr=2)
-
-        ix = date_range("1/1/2000", periods=10, freq="M")
-        df.set_index(ix, inplace=True)
-        df_err.set_index(ix, inplace=True)
-        ax = _check_plot_works(df.plot, yerr=df_err, kind="line")
-        self._check_has_errorbars(ax, xerr=0, yerr=2)
-
-        d = {"x": np.arange(12), "y": np.arange(12, 0, -1)}
-        df = DataFrame(d)
-        d_err = {"x": np.ones(12) * 0.2, "z": np.ones(12) * 0.4}
-        df_err = DataFrame(d_err)
-        for err in [d_err, df_err]:
-            ax = _check_plot_works(df.plot, yerr=err)
-            self._check_has_errorbars(ax, xerr=0, yerr=1)
-
-    @pytest.mark.slow
-    def test_errorbar_timeseries(self):
-
-        with warnings.catch_warnings():
-            d = {"x": np.arange(12), "y": np.arange(12, 0, -1)}
-            d_err = {"x": np.ones(12) * 0.2, "y": np.ones(12) * 0.4}
-
-            # check time-series plots
-            ix = date_range("1/1/2000", "1/1/2001", freq="M")
-            tdf = DataFrame(d, index=ix)
-            tdf_err = DataFrame(d_err, index=ix)
-
-            kinds = ["line", "bar", "barh"]
-            for kind in kinds:
-                ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind)
-                self._check_has_errorbars(ax, xerr=0, yerr=2)
-                ax = _check_plot_works(tdf.plot, yerr=d_err, kind=kind)
-                self._check_has_errorbars(ax, xerr=0, yerr=2)
-                ax = _check_plot_works(tdf.plot, y="y", yerr=tdf_err["x"], kind=kind)
-                self._check_has_errorbars(ax, xerr=0, yerr=1)
-                ax = _check_plot_works(tdf.plot, y="y", yerr="x", kind=kind)
-                self._check_has_errorbars(ax, xerr=0, yerr=1)
-                ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind)
-                self._check_has_errorbars(ax, xerr=0, yerr=2)
-
-                # _check_plot_works adds an ax so catch warning. see GH #13188
-                axes = _check_plot_works(
-                    tdf.plot, kind=kind, yerr=tdf_err, subplots=True
-                )
-                self._check_has_errorbars(axes, xerr=0, yerr=1)
-
-    def test_errorbar_asymmetrical(self):
-
-        np.random.seed(0)
-        err = np.random.rand(3, 2, 5)
-
-        # each column is [0, 1, 2, 3, 4], [3, 4, 5, 6, 7]...
-        df = DataFrame(np.arange(15).reshape(3, 5)).T
-
-        ax = df.plot(yerr=err, xerr=err / 2)
-
-        yerr_0_0 = ax.collections[1].get_paths()[0].vertices[:, 1]
-        expected_0_0 = err[0, :, 0] * np.array([-1, 1])
-        tm.assert_almost_equal(yerr_0_0, expected_0_0)
-
-        with pytest.raises(ValueError):
-            df.plot(yerr=err.T)
-
-        tm.close()
-
-    def test_table(self):
-        df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10]))
-        _check_plot_works(df.plot, table=True)
-        _check_plot_works(df.plot, table=df)
-
-        ax = df.plot()
-        assert len(ax.tables) == 0
-        plotting.table(ax, df.T)
-        assert len(ax.tables) == 1
-
-    def test_errorbar_scatter(self):
-        df = DataFrame(np.random.randn(5, 2), index=range(5), columns=["x", "y"])
-        df_err = DataFrame(
-            np.random.randn(5, 2) / 5, index=range(5), columns=["x", "y"]
-        )
-
-        ax = _check_plot_works(df.plot.scatter, x="x", y="y")
-        self._check_has_errorbars(ax, xerr=0, yerr=0)
-        ax = _check_plot_works(df.plot.scatter, x="x", y="y", xerr=df_err)
-        self._check_has_errorbars(ax, xerr=1, yerr=0)
-
-        ax = _check_plot_works(df.plot.scatter, x="x", y="y", yerr=df_err)
-        self._check_has_errorbars(ax, xerr=0, yerr=1)
-        ax = _check_plot_works(df.plot.scatter, x="x", y="y", xerr=df_err, yerr=df_err)
-        self._check_has_errorbars(ax, xerr=1, yerr=1)
-
-        def _check_errorbar_color(containers, expected, has_err="has_xerr"):
-            lines = []
-            errs = [c.lines for c in ax.containers if getattr(c, has_err, False)][0]
-            for el in errs:
-                if is_list_like(el):
-                    lines.extend(el)
-                else:
-                    lines.append(el)
-            err_lines = [x for x in lines if x in ax.collections]
-            self._check_colors(
-                err_lines, linecolors=np.array([expected] * len(err_lines))
-            )
-
-        # GH 8081
-        df = DataFrame(np.random.randn(10, 5), columns=["a", "b", "c", "d", "e"])
-        ax = df.plot.scatter(x="a", y="b", xerr="d", yerr="e", c="red")
-        self._check_has_errorbars(ax, xerr=1, yerr=1)
-        _check_errorbar_color(ax.containers, "red", has_err="has_xerr")
-        _check_errorbar_color(ax.containers, "red", has_err="has_yerr")
-
-        ax = df.plot.scatter(x="a", y="b", yerr="e", color="green")
-        self._check_has_errorbars(ax, xerr=0, yerr=1)
-        _check_errorbar_color(ax.containers, "green", has_err="has_yerr")
-
-    @pytest.mark.slow
-    def test_sharex_and_ax(self):
-        # https://github.com/pandas-dev/pandas/issues/9737 using gridspec,
-        # the axis in fig.get_axis() are sorted differently than pandas
-        # expected them, so make sure that only the right ones are removed
-        import matplotlib.pyplot as plt
-
-        plt.close("all")
-        gs, axes = _generate_4_axes_via_gridspec()
-
-        df = DataFrame(
-            {
-                "a": [1, 2, 3, 4, 5, 6],
-                "b": [1, 2, 3, 4, 5, 6],
-                "c": [1, 2, 3, 4, 5, 6],
-                "d": [1, 2, 3, 4, 5, 6],
-            }
-        )
-
-        def _check(axes):
-            for ax in axes:
-                assert len(ax.lines) == 1
-                self._check_visible(ax.get_yticklabels(), visible=True)
-            for ax in [axes[0], axes[2]]:
-                self._check_visible(ax.get_xticklabels(), visible=False)
-                self._check_visible(ax.get_xticklabels(minor=True), visible=False)
-            for ax in [axes[1], axes[3]]:
-                self._check_visible(ax.get_xticklabels(), visible=True)
-                self._check_visible(ax.get_xticklabels(minor=True), visible=True)
-
-        for ax in axes:
-            df.plot(x="a", y="b", title="title", ax=ax, sharex=True)
-        gs.tight_layout(plt.gcf())
-        _check(axes)
-        tm.close()
-
-        gs, axes = _generate_4_axes_via_gridspec()
-        with tm.assert_produces_warning(UserWarning):
-            axes = df.plot(subplots=True, ax=axes, sharex=True)
-        _check(axes)
-        tm.close()
-
-        gs, axes = _generate_4_axes_via_gridspec()
-        # without sharex, no labels should be touched!
-        for ax in axes:
-            df.plot(x="a", y="b", title="title", ax=ax)
-
-        gs.tight_layout(plt.gcf())
-        for ax in axes:
-            assert len(ax.lines) == 1
-            self._check_visible(ax.get_yticklabels(), visible=True)
-            self._check_visible(ax.get_xticklabels(), visible=True)
-            self._check_visible(ax.get_xticklabels(minor=True), visible=True)
-        tm.close()
-
-    @pytest.mark.slow
-    def test_sharey_and_ax(self):
-        # https://github.com/pandas-dev/pandas/issues/9737 using gridspec,
-        # the axis in fig.get_axis() are sorted differently than pandas
-        # expected them, so make sure that only the right ones are removed
-        import matplotlib.pyplot as plt
-
-        gs, axes = _generate_4_axes_via_gridspec()
-
-        df = DataFrame(
-            {
-                "a": [1, 2, 3, 4, 5, 6],
-                "b": [1, 2, 3, 4, 5, 6],
-                "c": [1, 2, 3, 4, 5, 6],
-                "d": [1, 2, 3, 4, 5, 6],
-            }
-        )
-
-        def _check(axes):
-            for ax in axes:
-                assert len(ax.lines) == 1
-                self._check_visible(ax.get_xticklabels(), visible=True)
-                self._check_visible(ax.get_xticklabels(minor=True), visible=True)
-            for ax in [axes[0], axes[1]]:
-                self._check_visible(ax.get_yticklabels(), visible=True)
-            for ax in [axes[2], axes[3]]:
-                self._check_visible(ax.get_yticklabels(), visible=False)
-
-        for ax in axes:
-            df.plot(x="a", y="b", title="title", ax=ax, sharey=True)
-        gs.tight_layout(plt.gcf())
-        _check(axes)
-        tm.close()
-
-        gs, axes = _generate_4_axes_via_gridspec()
-        with tm.assert_produces_warning(UserWarning):
-            axes = df.plot(subplots=True, ax=axes, sharey=True)
-
-        gs.tight_layout(plt.gcf())
-        _check(axes)
-        tm.close()
-
-        gs, axes = _generate_4_axes_via_gridspec()
-        # without sharex, no labels should be touched!
-        for ax in axes:
-            df.plot(x="a", y="b", title="title", ax=ax)
-
-        gs.tight_layout(plt.gcf())
-        for ax in axes:
-            assert len(ax.lines) == 1
-            self._check_visible(ax.get_yticklabels(), visible=True)
-            self._check_visible(ax.get_xticklabels(), visible=True)
-            self._check_visible(ax.get_xticklabels(minor=True), visible=True)
-
-    @td.skip_if_no_scipy
-    def test_memory_leak(self):
-        """ Check that every plot type gets properly collected. """
-        import weakref
-        import gc
-
-        results = {}
-        for kind in plotting.PlotAccessor._all_kinds:
-
-            args = {}
-            if kind in ["hexbin", "scatter", "pie"]:
-                df = self.hexbin_df
-                args = {"x": "A", "y": "B"}
-            elif kind == "area":
-                df = self.tdf.abs()
-            else:
-                df = self.tdf
-
-            # Use a weakref so we can see if the object gets collected without
-            # also preventing it from being collected
-            results[kind] = weakref.proxy(df.plot(kind=kind, **args))
-
-        # have matplotlib delete all the figures
-        tm.close()
-        # force a garbage collection
-        gc.collect()
-        for key in results:
-            # check that every plot was collected
-            with pytest.raises(ReferenceError):
-                # need to actually access something to get an error
-                results[key].lines
-
-    @pytest.mark.slow
-    def test_df_subplots_patterns_minorticks(self):
-        # GH 10657
-        import matplotlib.pyplot as plt
-
-        df = DataFrame(
-            np.random.randn(10, 2),
-            index=date_range("1/1/2000", periods=10),
-            columns=list("AB"),
-        )
-
-        # shared subplots
-        fig, axes = plt.subplots(2, 1, sharex=True)
-        axes = df.plot(subplots=True, ax=axes)
-        for ax in axes:
-            assert len(ax.lines) == 1
-            self._check_visible(ax.get_yticklabels(), visible=True)
-        # xaxis of 1st ax must be hidden
-        self._check_visible(axes[0].get_xticklabels(), visible=False)
-        self._check_visible(axes[0].get_xticklabels(minor=True), visible=False)
-        self._check_visible(axes[1].get_xticklabels(), visible=True)
-        self._check_visible(axes[1].get_xticklabels(minor=True), visible=True)
-        tm.close()
-
-        fig, axes = plt.subplots(2, 1)
-        with tm.assert_produces_warning(UserWarning):
-            axes = df.plot(subplots=True, ax=axes, sharex=True)
-        for ax in axes:
-            assert len(ax.lines) == 1
-            self._check_visible(ax.get_yticklabels(), visible=True)
-        # xaxis of 1st ax must be hidden
-        self._check_visible(axes[0].get_xticklabels(), visible=False)
-        self._check_visible(axes[0].get_xticklabels(minor=True), visible=False)
-        self._check_visible(axes[1].get_xticklabels(), visible=True)
-        self._check_visible(axes[1].get_xticklabels(minor=True), visible=True)
-        tm.close()
-
-        # not shared
-        fig, axes = plt.subplots(2, 1)
-        axes = df.plot(subplots=True, ax=axes)
-        for ax in axes:
-            assert len(ax.lines) == 1
-            self._check_visible(ax.get_yticklabels(), visible=True)
-            self._check_visible(ax.get_xticklabels(), visible=True)
-            self._check_visible(ax.get_xticklabels(minor=True), visible=True)
-        tm.close()
-
-    @pytest.mark.slow
-    def test_df_gridspec_patterns(self):
-        # GH 10819
-        import matplotlib.pyplot as plt
-        import matplotlib.gridspec as gridspec
-
-        ts = Series(np.random.randn(10), index=date_range("1/1/2000", periods=10))
-
-        df = DataFrame(np.random.randn(10, 2), index=ts.index, columns=list("AB"))
-
-        def _get_vertical_grid():
-            gs = gridspec.GridSpec(3, 1)
-            fig = plt.figure()
-            ax1 = fig.add_subplot(gs[:2, :])
-            ax2 = fig.add_subplot(gs[2, :])
-            return ax1, ax2
-
-        def _get_horizontal_grid():
-            gs = gridspec.GridSpec(1, 3)
-            fig = plt.figure()
-            ax1 = fig.add_subplot(gs[:, :2])
-            ax2 = fig.add_subplot(gs[:, 2])
-            return ax1, ax2
-
-        for ax1, ax2 in [_get_vertical_grid(), _get_horizontal_grid()]:
-            ax1 = ts.plot(ax=ax1)
-            assert len(ax1.lines) == 1
-            ax2 = df.plot(ax=ax2)
-            assert len(ax2.lines) == 2
-            for ax in [ax1, ax2]:
-                self._check_visible(ax.get_yticklabels(), visible=True)
-                self._check_visible(ax.get_xticklabels(), visible=True)
-                self._check_visible(ax.get_xticklabels(minor=True), visible=True)
-            tm.close()
-
-        # subplots=True
-        for ax1, ax2 in [_get_vertical_grid(), _get_horizontal_grid()]:
-            axes = df.plot(subplots=True, ax=[ax1, ax2])
-            assert len(ax1.lines) == 1
-            assert len(ax2.lines) == 1
-            for ax in axes:
-                self._check_visible(ax.get_yticklabels(), visible=True)
-                self._check_visible(ax.get_xticklabels(), visible=True)
-                self._check_visible(ax.get_xticklabels(minor=True), visible=True)
-            tm.close()
-
-        # vertical / subplots / sharex=True / sharey=True
-        ax1, ax2 = _get_vertical_grid()
-        with tm.assert_produces_warning(UserWarning):
-            axes = df.plot(subplots=True, ax=[ax1, ax2], sharex=True, sharey=True)
-        assert len(axes[0].lines) == 1
-        assert len(axes[1].lines) == 1
-        for ax in [ax1, ax2]:
-            # yaxis are visible because there is only one column
-            self._check_visible(ax.get_yticklabels(), visible=True)
-        # xaxis of axes0 (top) are hidden
-        self._check_visible(axes[0].get_xticklabels(), visible=False)
-        self._check_visible(axes[0].get_xticklabels(minor=True), visible=False)
-        self._check_visible(axes[1].get_xticklabels(), visible=True)
-        self._check_visible(axes[1].get_xticklabels(minor=True), visible=True)
-        tm.close()
-
-        # horizontal / subplots / sharex=True / sharey=True
-        ax1, ax2 = _get_horizontal_grid()
-        with tm.assert_produces_warning(UserWarning):
-            axes = df.plot(subplots=True, ax=[ax1, ax2], sharex=True, sharey=True)
-        assert len(axes[0].lines) == 1
-        assert len(axes[1].lines) == 1
-        self._check_visible(axes[0].get_yticklabels(), visible=True)
-        # yaxis of axes1 (right) are hidden
-        self._check_visible(axes[1].get_yticklabels(), visible=False)
-        for ax in [ax1, ax2]:
-            # xaxis are visible because there is only one column
-            self._check_visible(ax.get_xticklabels(), visible=True)
-            self._check_visible(ax.get_xticklabels(minor=True), visible=True)
-        tm.close()
-
-        # boxed
-        def _get_boxed_grid():
-            gs = gridspec.GridSpec(3, 3)
-            fig = plt.figure()
-            ax1 = fig.add_subplot(gs[:2, :2])
-            ax2 = fig.add_subplot(gs[:2, 2])
-            ax3 = fig.add_subplot(gs[2, :2])
-            ax4 = fig.add_subplot(gs[2, 2])
-            return ax1, ax2, ax3, ax4
-
-        axes = _get_boxed_grid()
-        df = DataFrame(np.random.randn(10, 4), index=ts.index, columns=list("ABCD"))
-        axes = df.plot(subplots=True, ax=axes)
-        for ax in axes:
-            assert len(ax.lines) == 1
-            # axis are visible because these are not shared
-            self._check_visible(ax.get_yticklabels(), visible=True)
-            self._check_visible(ax.get_xticklabels(), visible=True)
-            self._check_visible(ax.get_xticklabels(minor=True), visible=True)
-        tm.close()
-
-        # subplots / sharex=True / sharey=True
-        axes = _get_boxed_grid()
-        with tm.assert_produces_warning(UserWarning):
-            axes = df.plot(subplots=True, ax=axes, sharex=True, sharey=True)
-        for ax in axes:
-            assert len(ax.lines) == 1
-        for ax in [axes[0], axes[2]]:  # left column
-            self._check_visible(ax.get_yticklabels(), visible=True)
-        for ax in [axes[1], axes[3]]:  # right column
-            self._check_visible(ax.get_yticklabels(), visible=False)
-        for ax in [axes[0], axes[1]]:  # top row
-            self._check_visible(ax.get_xticklabels(), visible=False)
-            self._check_visible(ax.get_xticklabels(minor=True), visible=False)
-        for ax in [axes[2], axes[3]]:  # bottom row
-            self._check_visible(ax.get_xticklabels(), visible=True)
-            self._check_visible(ax.get_xticklabels(minor=True), visible=True)
-        tm.close()
-
-    @pytest.mark.slow
-    def test_df_grid_settings(self):
-        # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792
-        self._check_grid_settings(
-            DataFrame({"a": [1, 2, 3], "b": [2, 3, 4]}),
-            plotting.PlotAccessor._dataframe_kinds,
-            kws={"x": "a", "y": "b"},
-        )
-
-    def test_invalid_colormap(self):
-        df = DataFrame(randn(3, 2), columns=["A", "B"])
-
-        with pytest.raises(ValueError):
-            df.plot(colormap="invalid_colormap")
-
-    def test_plain_axes(self):
-
-        # supplied ax itself is a SubplotAxes, but figure contains also
-        # a plain Axes object (GH11556)
-        fig, ax = self.plt.subplots()
-        fig.add_axes([0.2, 0.2, 0.2, 0.2])
-        Series(rand(10)).plot(ax=ax)
-
-        # supplied ax itself is a plain Axes, but because the cmap keyword
-        # a new ax is created for the colorbar -> also multiples axes (GH11520)
-        df = DataFrame({"a": randn(8), "b": randn(8)})
-        fig = self.plt.figure()
-        ax = fig.add_axes((0, 0, 1, 1))
-        df.plot(kind="scatter", ax=ax, x="a", y="b", c="a", cmap="hsv")
-
-        # other examples
-        fig, ax = self.plt.subplots()
-        from mpl_toolkits.axes_grid1 import make_axes_locatable
-
-        divider = make_axes_locatable(ax)
-        cax = divider.append_axes("right", size="5%", pad=0.05)
-        Series(rand(10)).plot(ax=ax)
-        Series(rand(10)).plot(ax=cax)
-
-        fig, ax = self.plt.subplots()
-        from mpl_toolkits.axes_grid1.inset_locator import inset_axes
-
-        iax = inset_axes(ax, width="30%", height=1.0, loc=3)
-        Series(rand(10)).plot(ax=ax)
-        Series(rand(10)).plot(ax=iax)
-
-    def test_passed_bar_colors(self):
-        import matplotlib as mpl
-
-        color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)]
-        colormap = mpl.colors.ListedColormap(color_tuples)
-        barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap)
-        assert color_tuples == [c.get_facecolor() for c in barplot.patches]
-
-    def test_rcParams_bar_colors(self):
-        import matplotlib as mpl
-
-        color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)]
-        with mpl.rc_context(rc={"axes.prop_cycle": mpl.cycler("color", color_tuples)}):
-            barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar")
-        assert color_tuples == [c.get_facecolor() for c in barplot.patches]
-
-    @pytest.mark.parametrize("method", ["line", "barh", "bar"])
-    def test_secondary_axis_font_size(self, method):
-        # GH: 12565
-        df = (
-            pd.DataFrame(np.random.randn(15, 2), columns=list("AB"))
-            .assign(C=lambda df: df.B.cumsum())
-            .assign(D=lambda df: df.C * 1.1)
-        )
-
-        fontsize = 20
-        sy = ["C", "D"]
-
-        kwargs = dict(secondary_y=sy, fontsize=fontsize, mark_right=True)
-        ax = getattr(df.plot, method)(**kwargs)
-        self._check_ticks_props(axes=ax.right_ax, ylabelsize=fontsize)
-
-    @pytest.mark.slow
-    def test_x_string_values_ticks(self):
-        # Test if string plot index have a fixed xtick position
-        # GH: 7612, GH: 22334
-        df = pd.DataFrame(
-            {
-                "sales": [3, 2, 3],
-                "visits": [20, 42, 28],
-                "day": ["Monday", "Tuesday", "Wednesday"],
-            }
-        )
-        ax = df.plot.area(x="day")
-        ax.set_xlim(-1, 3)
-        xticklabels = [t.get_text() for t in ax.get_xticklabels()]
-        labels_position = dict(zip(xticklabels, ax.get_xticks()))
-        # Testing if the label stayed at the right position
-        assert labels_position["Monday"] == 0.0
-        assert labels_position["Tuesday"] == 1.0
-        assert labels_position["Wednesday"] == 2.0
-
-    @pytest.mark.slow
-    def test_x_multiindex_values_ticks(self):
-        # Test if multiindex plot index have a fixed xtick position
-        # GH: 15912
-        index = pd.MultiIndex.from_product([[2012, 2013], [1, 2]])
-        df = pd.DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index)
-        ax = df.plot()
-        ax.set_xlim(-1, 4)
-        xticklabels = [t.get_text() for t in ax.get_xticklabels()]
-        labels_position = dict(zip(xticklabels, ax.get_xticks()))
-        # Testing if the label stayed at the right position
-        assert labels_position["(2012, 1)"] == 0.0
-        assert labels_position["(2012, 2)"] == 1.0
-        assert labels_position["(2013, 1)"] == 2.0
-        assert labels_position["(2013, 2)"] == 3.0
-
-    @pytest.mark.parametrize("kind", ["line", "area"])
-    def test_xlim_plot_line(self, kind):
-        # test if xlim is set correctly in plot.line and plot.area
-        # GH 27686
-        df = pd.DataFrame([2, 4], index=[1, 2])
-        ax = df.plot(kind=kind)
-        xlims = ax.get_xlim()
-        assert xlims[0] < 1
-        assert xlims[1] > 2
-
-    def test_xlim_plot_line_correctly_in_mixed_plot_type(self):
-        # test if xlim is set correctly when ax contains multiple different kinds
-        # of plots, GH 27686
-        fig, ax = self.plt.subplots()
-
-        indexes = ["k1", "k2", "k3", "k4"]
-        df = pd.DataFrame(
-            {
-                "s1": [1000, 2000, 1500, 2000],
-                "s2": [900, 1400, 2000, 3000],
-                "s3": [1500, 1500, 1600, 1200],
-                "secondary_y": [1, 3, 4, 3],
-            },
-            index=indexes,
-        )
-        df[["s1", "s2", "s3"]].plot.bar(ax=ax, stacked=False)
-        df[["secondary_y"]].plot(ax=ax, secondary_y=True)
-
-        xlims = ax.get_xlim()
-        assert xlims[0] < 0
-        assert xlims[1] > 3
-
-        # make sure axis labels are plotted correctly as well
-        xticklabels = [t.get_text() for t in ax.get_xticklabels()]
-        assert xticklabels == indexes
-
-    def test_subplots_sharex_false(self):
-        # test when sharex is set to False, two plots should have different
-        # labels, GH 25160
-        df = pd.DataFrame(np.random.rand(10, 2))
-        df.iloc[5:, 1] = np.nan
-        df.iloc[:5, 0] = np.nan
-
-        figs, axs = self.plt.subplots(2, 1)
-        df.plot.line(ax=axs, subplots=True, sharex=False)
-
-        expected_ax1 = np.arange(4.5, 10, 0.5)
-        expected_ax2 = np.arange(-0.5, 5, 0.5)
-
-        tm.assert_numpy_array_equal(axs[0].get_xticks(), expected_ax1)
-        tm.assert_numpy_array_equal(axs[1].get_xticks(), expected_ax2)
-
-    @pytest.mark.parametrize("by", ["C", ["C", "D"]])
-    @pytest.mark.parametrize("column", ["A", ["A", "B"], None])
-    def test_hist_plot_by_argument(self, by, column, test_hist_with_by_df):
-        # GH 15079
-        _check_plot_works(test_hist_with_by_df.plot.hist, column=column, by=by)
-
-    @pytest.mark.slow
-    @pytest.mark.parametrize(
-        "by, column, layout, axes_num",
-        [
-            (["C"], "A", (2, 2), 3),
-            ("C", "A", (2, 2), 3),
-            (["C"], ["A"], (1, 3), 3),
-            ("C", None, (3, 1), 3),
-            ("C", ["A", "B"], (3, 1), 3),
-            (["C", "D"], "A", (9, 1), 9),
-            (["C", "D"], "A", (3, 3), 9),
-            (["C", "D"], ["A"], (5, 2), 9),
-            (["C", "D"], ["A", "B"], (9, 1), 9),
-            (["C", "D"], None, (9, 1), 9),
-            (["C", "D"], ["A", "B"], (5, 2), 9),
-        ],
-    )
-    def test_hist_plot_layout_with_by(self, by, column, layout, axes_num, test_hist_with_by_df):
-        # GH 15079
-        # _check_plot_works adds an ax so catch warning. see GH #13188
-        with tm.assert_produces_warning(UserWarning):
-            axes = _check_plot_works(
-                test_hist_with_by_df.plot.hist, column=column, by=by, layout=layout
-            )
-        self._check_axes_shape(axes, axes_num=axes_num, layout=layout)
-
-    def test_hist_plot_invalid_layout_with_by_raises(self, test_hist_with_by_df):
-        # GH 15079, test if error is raised when invalid layout is given
-
-        # layout too small for all 3 plots
-        msg = "larger than required size"
-        with pytest.raises(ValueError, match=msg):
-            test_hist_with_by_df.plot.hist(column=["A", "B"], by="C", layout=(1, 1))
-
-        # invalid format for layout
-        msg = re.escape("Layout must be a tuple of (rows, columns)")
-        with pytest.raises(ValueError, match=msg):
-            test_hist_with_by_df.plot.hist(column=["A", "B"], by="C", layout=(1,))
-
-        msg = "At least one dimension of layout must be positive"
-        with pytest.raises(ValueError, match=msg):
-            test_hist_with_by_df.plot.hist(column=["A", "B"], by="C", layout=(-1, -1))
-
-    @pytest.mark.slow
-    def test_axis_share_x_with_by(self, test_hist_with_by_df):
-        # GH 15079
-        ax1, ax2, ax3 = test_hist_with_by_df.plot.hist(column="A", by="C", sharex=True)
-
-        # share x
-        assert ax1._shared_x_axes.joined(ax1, ax2)
-        assert ax2._shared_x_axes.joined(ax1, ax2)
-        assert ax3._shared_x_axes.joined(ax1, ax3)
-        assert ax3._shared_x_axes.joined(ax2, ax3)
-
-        # don't share y
-        assert not ax1._shared_y_axes.joined(ax1, ax2)
-        assert not ax2._shared_y_axes.joined(ax1, ax2)
-        assert not ax3._shared_y_axes.joined(ax1, ax3)
-        assert not ax3._shared_y_axes.joined(ax2, ax3)
-
-    @pytest.mark.slow
-    def test_axis_share_y_with_by(self, test_hist_with_by_df):
-        # GH 15079
-        ax1, ax2, ax3 = test_hist_with_by_df.plot.hist(column="A", by="C", sharey=True)
-
-        # share y
-        assert ax1._shared_y_axes.joined(ax1, ax2)
-        assert ax2._shared_y_axes.joined(ax1, ax2)
-        assert ax3._shared_y_axes.joined(ax1, ax3)
-        assert ax3._shared_y_axes.joined(ax2, ax3)
-
-        # don't share x
-        assert not ax1._shared_x_axes.joined(ax1, ax2)
-        assert not ax2._shared_x_axes.joined(ax1, ax2)
-        assert not ax3._shared_x_axes.joined(ax1, ax3)
-        assert not ax3._shared_x_axes.joined(ax2, ax3)
-
-    @pytest.mark.parametrize("figsize", [(12, 8), (20, 10)])
-    def test_figure_shape_hist_with_by(self, figsize, test_hist_with_by_df):
-        # GH 15079
-        axes = test_hist_with_by_df.plot.hist(column="A", by="C", figsize=figsize)
-        self._check_axes_shape(axes, axes_num=3, figsize=figsize)
-
-    def test_plot_no_rows(self):
-        # GH 27758
-        df = pd.DataFrame(columns=["foo"], dtype=int)
-        assert df.empty
-        ax = df.plot()
-        assert len(ax.get_lines()) == 1
-        line = ax.get_lines()[0]
-        assert len(line.get_xdata()) == 0
-        assert len(line.get_ydata()) == 0
-
-    def test_plot_no_numeric_data(self):
-        df = pd.DataFrame(["a", "b", "c"])
-        with pytest.raises(TypeError):
-            df.plot()
-
-    def test_missing_markers_legend(self):
-        # 14958
-        df = pd.DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"])
-        ax = df.plot(y=["A"], marker="x", linestyle="solid")
-        df.plot(y=["B"], marker="o", linestyle="dotted", ax=ax)
-        df.plot(y=["C"], marker="<", linestyle="dotted", ax=ax)
-
-        self._check_legend_labels(ax, labels=["A", "B", "C"])
-        self._check_legend_marker(ax, expected_markers=["x", "o", "<"])
-
-    def test_missing_markers_legend_using_style(self):
-        # 14563
-        df = pd.DataFrame(
-            {
-                "A": [1, 2, 3, 4, 5, 6],
-                "B": [2, 4, 1, 3, 2, 4],
-                "C": [3, 3, 2, 6, 4, 2],
-                "X": [1, 2, 3, 4, 5, 6],
-            }
-        )
-
-        fig, ax = self.plt.subplots()
-        for kind in "ABC":
-            df.plot("X", kind, label=kind, ax=ax, style=".")
-
-        self._check_legend_labels(ax, labels=["A", "B", "C"])
-        self._check_legend_marker(ax, expected_markers=[".", ".", "."])
-
-    def test_colors_of_columns_with_same_name(self):
-        # ISSUE 11136 -> https://github.com/pandas-dev/pandas/issues/11136
-        # Creating a DataFrame with duplicate column labels and testing colors of them.
-        df = pd.DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]})
-        df1 = pd.DataFrame({"a": [2, 4, 6]})
-        df_concat = pd.concat([df, df1], axis=1)
-        result = df_concat.plot()
-        for legend, line in zip(result.get_legend().legendHandles, result.lines):
-            assert legend.get_color() == line.get_color()
-
-    @pytest.mark.parametrize(
-        "index_name, old_label, new_label",
-        [
-            (None, "", "new"),
-            ("old", "old", "new"),
-            (None, "", ""),
-            (None, "", 1),
-            (None, "", [1, 2]),
-        ],
-    )
-    @pytest.mark.parametrize("kind", ["line", "area", "bar"])
-    def test_xlabel_ylabel_dataframe_single_plot(
-        self, kind, index_name, old_label, new_label
-    ):
-        # GH 9093
-        df = pd.DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"])
-        df.index.name = index_name
-
-        # default is the ylabel is not shown and xlabel is index name
-        ax = df.plot(kind=kind)
-        assert ax.get_xlabel() == old_label
-        assert ax.get_ylabel() == ""
-
-        # old xlabel will be overriden and assigned ylabel will be used as ylabel
-        ax = df.plot(kind=kind, ylabel=new_label, xlabel=new_label)
-        assert ax.get_ylabel() == str(new_label)
-        assert ax.get_xlabel() == str(new_label)
-
-    @pytest.mark.parametrize(
-        "index_name, old_label, new_label",
-        [
-            (None, "", "new"),
-            ("old", "old", "new"),
-            (None, "", ""),
-            (None, "", 1),
-            (None, "", [1, 2]),
-        ],
-    )
-    @pytest.mark.parametrize("kind", ["line", "area", "bar"])
-    def test_xlabel_ylabel_dataframe_subplots(
-        self, kind, index_name, old_label, new_label
-    ):
-        # GH 9093
-        df = pd.DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"])
-        df.index.name = index_name
-
-        # default is the ylabel is not shown and xlabel is index name
-        axes = df.plot(kind=kind, subplots=True)
-        assert all(ax.get_ylabel() == "" for ax in axes)
-        assert all(ax.get_xlabel() == old_label for ax in axes)
-
-        # old xlabel will be overriden and assigned ylabel will be used as ylabel
-        axes = df.plot(kind=kind, ylabel=new_label, xlabel=new_label, subplots=True)
-        assert all(ax.get_ylabel() == str(new_label) for ax in axes)
-        assert all(ax.get_xlabel() == str(new_label) for ax in axes)
-
-
-def _generate_4_axes_via_gridspec():
-    import matplotlib.pyplot as plt
-    import matplotlib as mpl
-    import matplotlib.gridspec  # noqa
-
-    gs = mpl.gridspec.GridSpec(2, 2)
-    ax_tl = plt.subplot(gs[0, 0])
-    ax_ll = plt.subplot(gs[1, 0])
-    ax_tr = plt.subplot(gs[0, 1])
-    ax_lr = plt.subplot(gs[1, 1])
-
-    return gs, [ax_tl, ax_ll, ax_tr, ax_lr]
diff --git a/pandas/tests/plotting/test_hist_by.py b/pandas/tests/plotting/test_hist_by.py
new file mode 100644
index 0000000000000..7371e23026cf3
--- /dev/null
+++ b/pandas/tests/plotting/test_hist_by.py
@@ -0,0 +1,112 @@
+import re
+
+import numpy as np
+import pytest
+
+from pandas import DataFrame
+import pandas._testing as tm
+from pandas.tests.plotting.common import _check_axes_shape, _check_plot_works
+
+
+@pytest.fixture(scope="module")
+def test_hist_with_by_df():
+    np.random.seed(0)
+    df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
+    df["C"] = np.random.choice(["a", "b", "c"], 30)
+    df["D"] = np.random.choice(["a", "b", "c"], 30)
+    return df
+
+
+@pytest.mark.parametrize("by", ["C", ["C", "D"]])
+@pytest.mark.parametrize("column", ["A", ["A", "B"], None])
+def test_hist_plot_by_argument(by, column, test_hist_with_by_df):
+    # GH 15079
+    _check_plot_works(test_hist_with_by_df.plot.hist, column=column, by=by)
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize(
+    "by, column, layout, axes_num",
+    [
+        (["C"], "A", (2, 2), 3),
+        ("C", "A", (2, 2), 3),
+        (["C"], ["A"], (1, 3), 3),
+        ("C", None, (3, 1), 3),
+        ("C", ["A", "B"], (3, 1), 3),
+        (["C", "D"], "A", (9, 1), 9),
+        (["C", "D"], "A", (3, 3), 9),
+        (["C", "D"], ["A"], (5, 2), 9),
+        (["C", "D"], ["A", "B"], (9, 1), 9),
+        (["C", "D"], None, (9, 1), 9),
+        (["C", "D"], ["A", "B"], (5, 2), 9),
+    ],
+)
+def test_hist_plot_layout_with_by(by, column, layout, axes_num, test_hist_with_by_df):
+    # GH 15079
+    # _check_plot_works adds an ax so catch warning. see GH #13188
+    with tm.assert_produces_warning(UserWarning):
+        axes = _check_plot_works(
+            test_hist_with_by_df.plot.hist, column=column, by=by, layout=layout
+        )
+    _check_axes_shape(axes, axes_num=axes_num, layout=layout)
+
+
+def test_hist_plot_invalid_layout_with_by_raises(test_hist_with_by_df):
+    # GH 15079, test if error is raised when invalid layout is given
+
+    # layout too small for all 3 plots
+    msg = "larger than required size"
+    with pytest.raises(ValueError, match=msg):
+        test_hist_with_by_df.plot.hist(column=["A", "B"], by="C", layout=(1, 1))
+
+    # invalid format for layout
+    msg = re.escape("Layout must be a tuple of (rows, columns)")
+    with pytest.raises(ValueError, match=msg):
+        test_hist_with_by_df.plot.hist(column=["A", "B"], by="C", layout=(1,))
+
+    msg = "At least one dimension of layout must be positive"
+    with pytest.raises(ValueError, match=msg):
+        test_hist_with_by_df.plot.hist(column=["A", "B"], by="C", layout=(-1, -1))
+
+
+@pytest.mark.slow
+def test_axis_share_x_with_by(test_hist_with_by_df):
+    # GH 15079
+    ax1, ax2, ax3 = test_hist_with_by_df.plot.hist(column="A", by="C", sharex=True)
+
+    # share x
+    assert ax1._shared_x_axes.joined(ax1, ax2)
+    assert ax2._shared_x_axes.joined(ax1, ax2)
+    assert ax3._shared_x_axes.joined(ax1, ax3)
+    assert ax3._shared_x_axes.joined(ax2, ax3)
+
+    # don't share y
+    assert not ax1._shared_y_axes.joined(ax1, ax2)
+    assert not ax2._shared_y_axes.joined(ax1, ax2)
+    assert not ax3._shared_y_axes.joined(ax1, ax3)
+    assert not ax3._shared_y_axes.joined(ax2, ax3)
+
+
+@pytest.mark.slow
+def test_axis_share_y_with_by(test_hist_with_by_df):
+    # GH 15079
+    ax1, ax2, ax3 = test_hist_with_by_df.plot.hist(column="A", by="C", sharey=True)
+
+    # share y
+    assert ax1._shared_y_axes.joined(ax1, ax2)
+    assert ax2._shared_y_axes.joined(ax1, ax2)
+    assert ax3._shared_y_axes.joined(ax1, ax3)
+    assert ax3._shared_y_axes.joined(ax2, ax3)
+
+    # don't share x
+    assert not ax1._shared_x_axes.joined(ax1, ax2)
+    assert not ax2._shared_x_axes.joined(ax1, ax2)
+    assert not ax3._shared_x_axes.joined(ax1, ax3)
+    assert not ax3._shared_x_axes.joined(ax2, ax3)
+
+
+@pytest.mark.parametrize("figsize", [(12, 8), (20, 10)])
+def test_figure_shape_hist_with_by(figsize, test_hist_with_by_df):
+    # GH 15079
+    axes = test_hist_with_by_df.plot.hist(column="A", by="C", figsize=figsize)
+    _check_axes_shape(axes, axes_num=3, figsize=figsize)
\ No newline at end of file

From f7bcdb7d49aa88e63dc87620918c9022a5e24b5d Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Fri, 21 May 2021 20:25:12 +0200
Subject: [PATCH 103/142] revert change

---
 doc/source/whatsnew/v1.1.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index b540f163250d9..9f3ccb3e14116 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -273,6 +273,7 @@ change, as ``fsspec`` will still bring in the same packages as before.
 
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
+
 - Compatibility with matplotlib 3.3.0 (:issue:`34850`)
 - :meth:`IntegerArray.astype` now supports ``datetime64`` dtype (:issue:`32538`)
 - :class:`IntegerArray` now implements the ``sum`` operation (:issue:`33172`)

From aeb32e5b3bf9cb628bb152323c45b94100eec015 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Fri, 21 May 2021 20:57:57 +0200
Subject: [PATCH 104/142] rebase

---
 pandas/plotting/_core.py              |   2 +-
 pandas/plotting/_matplotlib/core.py   |   1 +
 pandas/plotting/_matplotlib/hist.py   |   1 +
 pandas/tests/plotting/test_hist_by.py | 190 +++++++++++++-------------
 4 files changed, 100 insertions(+), 94 deletions(-)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index db654a5ae66a3..63d64d8e027f2 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -1278,7 +1278,7 @@ def hist(self, by=None, bins=10, **kwargs):
         by : str or sequence, optional
             Column in the DataFrame to group by.
 
-            .. versionadded:: 1.1.0
+            .. versionadded:: 1.3.0
 
         bins : int, default 10
             Number of histogram bins to be used.
diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 2521fd5cb2aba..4fc98f740b5a7 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -59,6 +59,7 @@
     handle_shared_axes,
     table,
 )
+from pandas.plotting._matplotlib.groupby import reconstruct_data_with_by
 
 if TYPE_CHECKING:
     from matplotlib.axes import Axes
diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 295808c77094d..59e3aa4769287 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -30,6 +30,7 @@
     maybe_adjust_figure,
     set_ticks_props,
 )
+from pandas.plotting._matplotlib.groupby import create_iter_data_given_by, reformat_hist_y_given_by
 
 if TYPE_CHECKING:
     from matplotlib.axes import Axes
diff --git a/pandas/tests/plotting/test_hist_by.py b/pandas/tests/plotting/test_hist_by.py
index 7371e23026cf3..84b7f9ee9b434 100644
--- a/pandas/tests/plotting/test_hist_by.py
+++ b/pandas/tests/plotting/test_hist_by.py
@@ -5,7 +5,7 @@
 
 from pandas import DataFrame
 import pandas._testing as tm
-from pandas.tests.plotting.common import _check_axes_shape, _check_plot_works
+from pandas.tests.plotting.common import TestPlotBase, _check_plot_works
 
 
 @pytest.fixture(scope="module")
@@ -17,96 +17,100 @@ def test_hist_with_by_df():
     return df
 
 
-@pytest.mark.parametrize("by", ["C", ["C", "D"]])
-@pytest.mark.parametrize("column", ["A", ["A", "B"], None])
-def test_hist_plot_by_argument(by, column, test_hist_with_by_df):
+@td.skip_if_no_mpl
+class TestDataFrameColor(TestPlotBase):
+    def setup_method(self, method):
+        TestPlotBase.setup_method(self, method)
+        import matplotlib as mpl
+
+        mpl.rcdefaults()
+        self.hist_df = test_hist_with_by_df()
+
+    @pytest.mark.parametrize("by", ["C", ["C", "D"]])
+    @pytest.mark.parametrize("column", ["A", ["A", "B"], None])
+    def test_hist_plot_by_argument(self, by, column):
+        # GH 15079
+        _check_plot_works(self.hist_df.plot.hist, column=column, by=by)
+
+    @pytest.mark.slow
+    @pytest.mark.parametrize(
+        "by, column, layout, axes_num",
+        [
+            (["C"], "A", (2, 2), 3),
+            ("C", "A", (2, 2), 3),
+            (["C"], ["A"], (1, 3), 3),
+            ("C", None, (3, 1), 3),
+            ("C", ["A", "B"], (3, 1), 3),
+            (["C", "D"], "A", (9, 1), 9),
+            (["C", "D"], "A", (3, 3), 9),
+            (["C", "D"], ["A"], (5, 2), 9),
+            (["C", "D"], ["A", "B"], (9, 1), 9),
+            (["C", "D"], None, (9, 1), 9),
+            (["C", "D"], ["A", "B"], (5, 2), 9),
+        ],
+    )
+    def test_hist_plot_layout_with_by(self, by, column, layout, axes_num):
+        # GH 15079
+        # _check_plot_works adds an ax so catch warning. see GH #13188
+        with tm.assert_produces_warning(UserWarning):
+            axes = _check_plot_works(
+                self.hist_df.plot.hist, column=column, by=by, layout=layout
+            )
+        self._check_axes_shape(axes, axes_num=axes_num, layout=layout)
+
+    def test_hist_plot_invalid_layout_with_by_raises(self):
+        # GH 15079, test if error is raised when invalid layout is given
+
+        # layout too small for all 3 plots
+        msg = "larger than required size"
+        with pytest.raises(ValueError, match=msg):
+            self.hist_df.plot.hist(column=["A", "B"], by="C", layout=(1, 1))
+
+        # invalid format for layout
+        msg = re.escape("Layout must be a tuple of (rows, columns)")
+        with pytest.raises(ValueError, match=msg):
+            self.hist_df.plot.hist(column=["A", "B"], by="C", layout=(1,))
+
+        msg = "At least one dimension of layout must be positive"
+        with pytest.raises(ValueError, match=msg):
+            self.hist_df.plot.hist(column=["A", "B"], by="C", layout=(-1, -1))
+
+    @pytest.mark.slow
+    def test_axis_share_x_with_by(self):
+        # GH 15079
+        ax1, ax2, ax3 = self.hist_df.plot.hist(column="A", by="C", sharex=True)
+
+        # share x
+        assert ax1._shared_x_axes.joined(ax1, ax2)
+        assert ax2._shared_x_axes.joined(ax1, ax2)
+        assert ax3._shared_x_axes.joined(ax1, ax3)
+        assert ax3._shared_x_axes.joined(ax2, ax3)
+
+        # don't share y
+        assert not ax1._shared_y_axes.joined(ax1, ax2)
+        assert not ax2._shared_y_axes.joined(ax1, ax2)
+        assert not ax3._shared_y_axes.joined(ax1, ax3)
+        assert not ax3._shared_y_axes.joined(ax2, ax3)
+
+    @pytest.mark.slow
+    def test_axis_share_y_with_by(self):
+        # GH 15079
+        ax1, ax2, ax3 = self.hist_df.plot.hist(column="A", by="C", sharey=True)
+
+        # share y
+        assert ax1._shared_y_axes.joined(ax1, ax2)
+        assert ax2._shared_y_axes.joined(ax1, ax2)
+        assert ax3._shared_y_axes.joined(ax1, ax3)
+        assert ax3._shared_y_axes.joined(ax2, ax3)
+
+        # don't share x
+        assert not ax1._shared_x_axes.joined(ax1, ax2)
+        assert not ax2._shared_x_axes.joined(ax1, ax2)
+        assert not ax3._shared_x_axes.joined(ax1, ax3)
+        assert not ax3._shared_x_axes.joined(ax2, ax3)
+
+    @pytest.mark.parametrize("figsize", [(12, 8), (20, 10)])
+    def test_figure_shape_hist_with_by(self, figsize):
     # GH 15079
-    _check_plot_works(test_hist_with_by_df.plot.hist, column=column, by=by)
-
-
-@pytest.mark.slow
-@pytest.mark.parametrize(
-    "by, column, layout, axes_num",
-    [
-        (["C"], "A", (2, 2), 3),
-        ("C", "A", (2, 2), 3),
-        (["C"], ["A"], (1, 3), 3),
-        ("C", None, (3, 1), 3),
-        ("C", ["A", "B"], (3, 1), 3),
-        (["C", "D"], "A", (9, 1), 9),
-        (["C", "D"], "A", (3, 3), 9),
-        (["C", "D"], ["A"], (5, 2), 9),
-        (["C", "D"], ["A", "B"], (9, 1), 9),
-        (["C", "D"], None, (9, 1), 9),
-        (["C", "D"], ["A", "B"], (5, 2), 9),
-    ],
-)
-def test_hist_plot_layout_with_by(by, column, layout, axes_num, test_hist_with_by_df):
-    # GH 15079
-    # _check_plot_works adds an ax so catch warning. see GH #13188
-    with tm.assert_produces_warning(UserWarning):
-        axes = _check_plot_works(
-            test_hist_with_by_df.plot.hist, column=column, by=by, layout=layout
-        )
-    _check_axes_shape(axes, axes_num=axes_num, layout=layout)
-
-
-def test_hist_plot_invalid_layout_with_by_raises(test_hist_with_by_df):
-    # GH 15079, test if error is raised when invalid layout is given
-
-    # layout too small for all 3 plots
-    msg = "larger than required size"
-    with pytest.raises(ValueError, match=msg):
-        test_hist_with_by_df.plot.hist(column=["A", "B"], by="C", layout=(1, 1))
-
-    # invalid format for layout
-    msg = re.escape("Layout must be a tuple of (rows, columns)")
-    with pytest.raises(ValueError, match=msg):
-        test_hist_with_by_df.plot.hist(column=["A", "B"], by="C", layout=(1,))
-
-    msg = "At least one dimension of layout must be positive"
-    with pytest.raises(ValueError, match=msg):
-        test_hist_with_by_df.plot.hist(column=["A", "B"], by="C", layout=(-1, -1))
-
-
-@pytest.mark.slow
-def test_axis_share_x_with_by(test_hist_with_by_df):
-    # GH 15079
-    ax1, ax2, ax3 = test_hist_with_by_df.plot.hist(column="A", by="C", sharex=True)
-
-    # share x
-    assert ax1._shared_x_axes.joined(ax1, ax2)
-    assert ax2._shared_x_axes.joined(ax1, ax2)
-    assert ax3._shared_x_axes.joined(ax1, ax3)
-    assert ax3._shared_x_axes.joined(ax2, ax3)
-
-    # don't share y
-    assert not ax1._shared_y_axes.joined(ax1, ax2)
-    assert not ax2._shared_y_axes.joined(ax1, ax2)
-    assert not ax3._shared_y_axes.joined(ax1, ax3)
-    assert not ax3._shared_y_axes.joined(ax2, ax3)
-
-
-@pytest.mark.slow
-def test_axis_share_y_with_by(test_hist_with_by_df):
-    # GH 15079
-    ax1, ax2, ax3 = test_hist_with_by_df.plot.hist(column="A", by="C", sharey=True)
-
-    # share y
-    assert ax1._shared_y_axes.joined(ax1, ax2)
-    assert ax2._shared_y_axes.joined(ax1, ax2)
-    assert ax3._shared_y_axes.joined(ax1, ax3)
-    assert ax3._shared_y_axes.joined(ax2, ax3)
-
-    # don't share x
-    assert not ax1._shared_x_axes.joined(ax1, ax2)
-    assert not ax2._shared_x_axes.joined(ax1, ax2)
-    assert not ax3._shared_x_axes.joined(ax1, ax3)
-    assert not ax3._shared_x_axes.joined(ax2, ax3)
-
-
-@pytest.mark.parametrize("figsize", [(12, 8), (20, 10)])
-def test_figure_shape_hist_with_by(figsize, test_hist_with_by_df):
-    # GH 15079
-    axes = test_hist_with_by_df.plot.hist(column="A", by="C", figsize=figsize)
-    _check_axes_shape(axes, axes_num=3, figsize=figsize)
\ No newline at end of file
+        axes = self.hist_df.plot.hist(column="A", by="C", figsize=figsize)
+        self._check_axes_shape(axes, axes_num=3, figsize=figsize)
\ No newline at end of file

From dc1795983ff8b0217dd4caab210b65ab98df610b Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Fri, 21 May 2021 21:19:14 +0200
Subject: [PATCH 105/142] fixup

---
 pandas/plotting/_matplotlib/groupby.py | 25 +++++++++++++++++++------
 pandas/tests/plotting/test_hist_by.py  | 12 ++++++++----
 2 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py
index 061f95aacec90..4de3ae0996483 100644
--- a/pandas/plotting/_matplotlib/groupby.py
+++ b/pandas/plotting/_matplotlib/groupby.py
@@ -1,16 +1,29 @@
-from typing import Dict, List, Optional, Union
+from typing import (
+    Dict,
+    List,
+    Optional,
+    Union,
+)
 
 import numpy as np
 
-from pandas._typing import FrameOrSeriesUnion, Label
+from pandas._typing import (
+    FrameOrSeriesUnion,
+    IndexLabel,
+)
 
 from pandas.core.dtypes.missing import isna
 
-from pandas import DataFrame, MultiIndex, Series, concat
+from pandas import (
+    DataFrame,
+    MultiIndex,
+    Series,
+    concat,
+)
 
 
 def create_iter_data_given_by(
-    data: DataFrame, by: Optional[List[Label]]
+    data: DataFrame, by: Optional[List[IndexLabel]]
 ) -> Union[DataFrame, Dict[str, FrameOrSeriesUnion]]:
     """
     Create data for iteration given `by` is assigned or not, and it is only
@@ -58,7 +71,7 @@ def create_iter_data_given_by(
 
 
 def reconstruct_data_with_by(
-    data: DataFrame, by: Union[Label, List[Label]], cols: List[Label]
+    data: DataFrame, by: Union[IndexLabel, List[IndexLabel]], cols: List[IndexLabel]
 ) -> DataFrame:
     """
     Internal function to group data, and reassign multiindex column names onto the
@@ -101,7 +114,7 @@ def reconstruct_data_with_by(
 
 
 def reformat_hist_y_given_by(
-    y: Union[Series, np.array], by: Optional[Union[Label, List[Label]]]
+    y: Union[Series, np.array], by: Optional[Union[IndexLabel, List[IndexLabel]]]
 ) -> Union[Series, np.array]:
     """Internal function to reformat y given `by` is applied or not for hist plot.
 
diff --git a/pandas/tests/plotting/test_hist_by.py b/pandas/tests/plotting/test_hist_by.py
index 84b7f9ee9b434..1e126caf6ef50 100644
--- a/pandas/tests/plotting/test_hist_by.py
+++ b/pandas/tests/plotting/test_hist_by.py
@@ -3,12 +3,16 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 from pandas import DataFrame
 import pandas._testing as tm
-from pandas.tests.plotting.common import TestPlotBase, _check_plot_works
+from pandas.tests.plotting.common import (
+    TestPlotBase,
+    _check_plot_works,
+)
 
 
-@pytest.fixture(scope="module")
 def test_hist_with_by_df():
     np.random.seed(0)
     df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
@@ -111,6 +115,6 @@ def test_axis_share_y_with_by(self):
 
     @pytest.mark.parametrize("figsize", [(12, 8), (20, 10)])
     def test_figure_shape_hist_with_by(self, figsize):
-    # GH 15079
+        # GH 15079
         axes = self.hist_df.plot.hist(column="A", by="C", figsize=figsize)
-        self._check_axes_shape(axes, axes_num=3, figsize=figsize)
\ No newline at end of file
+        self._check_axes_shape(axes, axes_num=3, figsize=figsize)

From 4aee3e0d91cd3b137bc142fc376ed94215aab5a1 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Fri, 21 May 2021 21:20:01 +0200
Subject: [PATCH 106/142] black

---
 pandas/plotting/_matplotlib/hist.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 59e3aa4769287..603379c93e995 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -24,13 +24,16 @@
     LinePlot,
     MPLPlot,
 )
+from pandas.plotting._matplotlib.groupby import (
+    create_iter_data_given_by,
+    reformat_hist_y_given_by,
+)
 from pandas.plotting._matplotlib.tools import (
     create_subplots,
     flatten_axes,
     maybe_adjust_figure,
     set_ticks_props,
 )
-from pandas.plotting._matplotlib.groupby import create_iter_data_given_by, reformat_hist_y_given_by
 
 if TYPE_CHECKING:
     from matplotlib.axes import Axes

From 4eb466fbf3ff8c15f192964921cfd78a843a27fd Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Fri, 21 May 2021 21:25:14 +0200
Subject: [PATCH 107/142] fixup

---
 pandas/plotting/_matplotlib/core.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 4fc98f740b5a7..03fb77c2d9166 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -60,6 +60,7 @@
     table,
 )
 from pandas.plotting._matplotlib.groupby import reconstruct_data_with_by
+from pandas._typing import IndexLabel
 
 if TYPE_CHECKING:
     from matplotlib.axes import Axes
@@ -125,7 +126,7 @@ def __init__(
         table=False,
         layout=None,
         include_bool=False,
-        column: Optional[Label] = None,
+        column: IndexLabel | None = None,
         **kwds,
     ):
 

From 51602240ece1edc8747155f66aabf60b11873026 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Sat, 22 May 2021 09:58:55 +0200
Subject: [PATCH 108/142] fix mypy

---
 pandas/plotting/_matplotlib/core.py    |  4 ++--
 pandas/plotting/_matplotlib/groupby.py | 10 ++++------
 pandas/plotting/_matplotlib/hist.py    |  2 +-
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 03fb77c2d9166..bfc6c5e228453 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -9,6 +9,7 @@
 from matplotlib.artist import Artist
 import numpy as np
 
+from pandas._typing import IndexLabel
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import cache_readonly
 
@@ -42,6 +43,7 @@
 from pandas.io.formats.printing import pprint_thing
 from pandas.plotting._matplotlib.compat import mpl_ge_3_0_0
 from pandas.plotting._matplotlib.converter import register_pandas_matplotlib_converters
+from pandas.plotting._matplotlib.groupby import reconstruct_data_with_by
 from pandas.plotting._matplotlib.style import get_standard_colors
 from pandas.plotting._matplotlib.timeseries import (
     decorate_axes,
@@ -59,8 +61,6 @@
     handle_shared_axes,
     table,
 )
-from pandas.plotting._matplotlib.groupby import reconstruct_data_with_by
-from pandas._typing import IndexLabel
 
 if TYPE_CHECKING:
     from matplotlib.axes import Axes
diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py
index 4de3ae0996483..5e36391c1286c 100644
--- a/pandas/plotting/_matplotlib/groupby.py
+++ b/pandas/plotting/_matplotlib/groupby.py
@@ -1,7 +1,5 @@
 from typing import (
     Dict,
-    List,
-    Optional,
     Union,
 )
 
@@ -23,7 +21,7 @@
 
 
 def create_iter_data_given_by(
-    data: DataFrame, by: Optional[List[IndexLabel]]
+    data: DataFrame, by: IndexLabel | None = None
 ) -> Union[DataFrame, Dict[str, FrameOrSeriesUnion]]:
     """
     Create data for iteration given `by` is assigned or not, and it is only
@@ -71,7 +69,7 @@ def create_iter_data_given_by(
 
 
 def reconstruct_data_with_by(
-    data: DataFrame, by: Union[IndexLabel, List[IndexLabel]], cols: List[IndexLabel]
+    data: DataFrame, by: IndexLabel, cols: IndexLabel
 ) -> DataFrame:
     """
     Internal function to group data, and reassign multiindex column names onto the
@@ -114,8 +112,8 @@ def reconstruct_data_with_by(
 
 
 def reformat_hist_y_given_by(
-    y: Union[Series, np.array], by: Optional[Union[IndexLabel, List[IndexLabel]]]
-) -> Union[Series, np.array]:
+    y: Union[Series, np.ndarray], by: IndexLabel | None = None
+) -> Union[Series, np.ndarray]:
     """Internal function to reformat y given `by` is applied or not for hist plot.
 
     If by is None, input y is 1-d with NaN removed; and if by is not None, groupby
diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 603379c93e995..00b27705d558e 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -63,7 +63,7 @@ def _args_adjust(self):
         if is_list_like(self.bottom):
             self.bottom = np.array(self.bottom)
 
-    def _calculate_bins(self, data: DataFrame) -> np.array:
+    def _calculate_bins(self, data: DataFrame) -> np.ndarray:
         """Calculate bins given data"""
         values = data._convert(datetime=True)._get_numeric_data()
         values = np.ravel(values)

From e2de0d395bd40fc603886614197bfc94b67600d4 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Sat, 22 May 2021 10:28:52 +0200
Subject: [PATCH 109/142] fix mypy

---
 pandas/plotting/_matplotlib/core.py    | 3 ++-
 pandas/plotting/_matplotlib/groupby.py | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index bfc6c5e228453..a76a6c1871199 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -3,6 +3,7 @@
 from typing import (
     TYPE_CHECKING,
     Hashable,
+    Optional
 )
 import warnings
 
@@ -126,7 +127,7 @@ def __init__(
         table=False,
         layout=None,
         include_bool=False,
-        column: IndexLabel | None = None,
+        column: Optional[IndexLabel] = None,
         **kwds,
     ):
 
diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py
index 5e36391c1286c..0f15a1b34f01e 100644
--- a/pandas/plotting/_matplotlib/groupby.py
+++ b/pandas/plotting/_matplotlib/groupby.py
@@ -1,5 +1,6 @@
 from typing import (
     Dict,
+    Optional,
     Union,
 )
 
@@ -21,7 +22,7 @@
 
 
 def create_iter_data_given_by(
-    data: DataFrame, by: IndexLabel | None = None
+    data: DataFrame, by: Optional[IndexLabel] = None
 ) -> Union[DataFrame, Dict[str, FrameOrSeriesUnion]]:
     """
     Create data for iteration given `by` is assigned or not, and it is only
@@ -112,7 +113,7 @@ def reconstruct_data_with_by(
 
 
 def reformat_hist_y_given_by(
-    y: Union[Series, np.ndarray], by: IndexLabel | None = None
+    y: Union[Series, np.ndarray], by: Optional[IndexLabel] = None
 ) -> Union[Series, np.ndarray]:
     """Internal function to reformat y given `by` is applied or not for hist plot.
 

From b2b33ac65c914ff5be4979f0302ccf9cdc9e7b40 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Sat, 22 May 2021 10:39:04 +0200
Subject: [PATCH 110/142] fix mypy

---
 pandas/plotting/_matplotlib/core.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index a76a6c1871199..bfc6c5e228453 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -3,7 +3,6 @@
 from typing import (
     TYPE_CHECKING,
     Hashable,
-    Optional
 )
 import warnings
 
@@ -127,7 +126,7 @@ def __init__(
         table=False,
         layout=None,
         include_bool=False,
-        column: Optional[IndexLabel] = None,
+        column: IndexLabel | None = None,
         **kwds,
     ):
 

From 1199a93639b63839ac48ec85b4c9c851e089c728 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Sat, 22 May 2021 13:04:14 +0200
Subject: [PATCH 111/142] fix mypy

---
 pandas/plotting/_matplotlib/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index bfc6c5e228453..e23a5d9ea8386 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -143,7 +143,7 @@ def __init__(
         if self.by and column is None:
             self.columns = [col for col in data.columns if col not in self.by]
         else:
-            self.columns = com.convert_to_list_like(column)
+            self.columns = com.maybe_make_list(column)
 
         self.kind = kind
 

From c4a584261f508b6fc4efb35420be3798276d06e7 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Sat, 22 May 2021 13:05:14 +0200
Subject: [PATCH 112/142] fix mypy

---
 pandas/plotting/_matplotlib/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index e23a5d9ea8386..96288bd33be7c 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -101,7 +101,7 @@ def __init__(
         self,
         data,
         kind=None,
-        by=None,
+        by: IndexLabel | None = None,
         subplots=False,
         sharex=None,
         sharey=False,

From 65564147cc92ff3e366c05eb158d5669f6ec942b Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Sat, 22 May 2021 13:32:17 +0200
Subject: [PATCH 113/142] fix mypy

---
 pandas/plotting/_matplotlib/groupby.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py
index 0f15a1b34f01e..e8ed5ed2a8b12 100644
--- a/pandas/plotting/_matplotlib/groupby.py
+++ b/pandas/plotting/_matplotlib/groupby.py
@@ -11,7 +11,10 @@
     IndexLabel,
 )
 
-from pandas.core.dtypes.missing import isna
+from pandas.core.dtypes.missing import (
+    isna,
+    remove_na_arraylike,
+)
 
 from pandas import (
     DataFrame,
@@ -121,8 +124,7 @@ def reformat_hist_y_given_by(
     will take place and input y is multi-dimensional array.
     """
     if by is not None and len(y.shape) > 1:
-        notna = [col[~isna(col)] for col in y.T]
-        y = np.array(np.array(notna).T)
+        y = np.array([remove_na_arraylike(col) for col in y.T]).T
     else:
-        y = y[~isna(y)]
+        y = remove_na_arraylike(y)
     return y

From 826f277c164a3f4c6a0307b31cc316c9a6593d66 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Sat, 22 May 2021 13:56:22 +0200
Subject: [PATCH 114/142] fix flake8

---
 pandas/plotting/_matplotlib/groupby.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py
index e8ed5ed2a8b12..4145b3359bafa 100644
--- a/pandas/plotting/_matplotlib/groupby.py
+++ b/pandas/plotting/_matplotlib/groupby.py
@@ -11,10 +11,7 @@
     IndexLabel,
 )
 
-from pandas.core.dtypes.missing import (
-    isna,
-    remove_na_arraylike,
-)
+from pandas.core.dtypes.missing import remove_na_arraylike
 
 from pandas import (
     DataFrame,

From 891dc55ba5a6d6de93bf44e8a997873cfdb2b91a Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Sat, 22 May 2021 14:53:44 +0200
Subject: [PATCH 115/142] add by support for boxplot

---
 pandas/plotting/_matplotlib/boxplot.py | 15 ++++++++++++++-
 pandas/plotting/_matplotlib/core.py    | 18 ++++++++++++++----
 pandas/plotting/_matplotlib/groupby.py | 17 +++++++++++++----
 3 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py
index 21f30c1311e17..b3526845604cb 100644
--- a/pandas/plotting/_matplotlib/boxplot.py
+++ b/pandas/plotting/_matplotlib/boxplot.py
@@ -18,6 +18,7 @@
     LinePlot,
     MPLPlot,
 )
+from pandas.plotting._matplotlib.groupby import create_iter_data_given_by
 from pandas.plotting._matplotlib.style import get_standard_colors
 from pandas.plotting._matplotlib.tools import (
     create_subplots,
@@ -135,10 +136,17 @@ def _make_plot(self):
         if self.subplots:
             self._return_obj = pd.Series(dtype=object)
 
-            for i, (label, y) in enumerate(self._iter_data()):
+            data = create_iter_data_given_by(self.data, self.by, self._kind)
+            for i, (label, y) in enumerate(self._iter_data(data=data)):
                 ax = self._get_ax(i)
                 kwds = self.kwds.copy()
 
+                # When by is applied, show title for subplots to know which group it is
+                # just like df.boxplot, and need to apply T on y to provide right input
+                if self.by is not None:
+                    y = y.T
+                    ax.set_title(pprint_thing(label))
+
                 ret, bp = self._plot(
                     ax, y, column_num=i, return_type=self.return_type, **kwds
                 )
@@ -146,6 +154,11 @@ def _make_plot(self):
                 self._return_obj[label] = ret
 
                 label = [pprint_thing(label)]
+
+                # When `by` is assigned, the ticklabels will become unique grouped
+                # values, instead of label which is used as subtitle in this case.
+                if self.by is not None:
+                    label = self.data.columns.levels[0]
                 self._set_ticklabels(ax, label)
         else:
             y = self.data.values.T
diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 96288bd33be7c..c71d084844a8e 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -135,16 +135,26 @@ def __init__(
         self.data = data
         self.by = com.maybe_make_list(by)
 
-        if self.by:
-            self._grouped_data_size = len(data.groupby(self.by))
-
         # Assign the rest of columns into self.columns if by is explicitly defined
         # while column is not, so as to keep the same behaviour with current df.hist
+        # or df.boxplot.
         if self.by and column is None:
-            self.columns = [col for col in data.columns if col not in self.by]
+            self.columns = [
+                col
+                for col in data.columns
+                if col not in self.by and is_numeric_dtype(data[col])
+            ]
         else:
             self.columns = com.maybe_make_list(column)
 
+        # When `by` is explicitly assigned, grouped data size will be defined, and
+        # this will determine number of subplots to have, aka the size of `self.axes`
+        if self.by:
+            if self._kind == "hist":
+                self._grouped_data_size = len(data.groupby(self.by))
+            elif self._kind == "box":
+                self._grouped_data_size = len(self.columns)
+
         self.kind = kind
 
         self.sort_columns = sort_columns
diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py
index 4145b3359bafa..59ae63964a4d7 100644
--- a/pandas/plotting/_matplotlib/groupby.py
+++ b/pandas/plotting/_matplotlib/groupby.py
@@ -22,7 +22,7 @@
 
 
 def create_iter_data_given_by(
-    data: DataFrame, by: Optional[IndexLabel] = None
+    data: DataFrame, by: Optional[IndexLabel] = None, kind: str = "hist"
 ) -> Union[DataFrame, Dict[str, FrameOrSeriesUnion]]:
     """
     Create data for iteration given `by` is assigned or not, and it is only
@@ -35,8 +35,9 @@ def create_iter_data_given_by(
 
     Parameters
     ----------
-    data: reformatted grouped data from `_compute_plot_data` method
+    data: reformatted grouped data from `_compute_plot_data` method.
     by: list or None, value assigned to `by`.
+    kind: str, plot kind. This function is only used for `hist` and `box` plots.
 
     Returns
     -------
@@ -56,15 +57,23 @@ def create_iter_data_given_by(
     {'h1': DataFrame({'a': [1, 3, np.nan], 'b': [3, 4, np.nan]}),
      'h2': DataFrame({'a': [np.nan, np.nan, 5], 'b': [np.nan, np.nan, 6]})}
     """
+    if kind == "hist":
+        level = 0
+    elif kind == "box":
+        level = 1
+    else:
+        raise ValueError("This function is only used for hist and box plot")
+
     iter_data: Union[DataFrame, Dict[str, FrameOrSeriesUnion]]
     if not by:
         iter_data = data
     else:
         # Select sub-columns based on the value of first level of MI
         assert isinstance(data.columns, MultiIndex)
-        cols = data.columns.levels[0]
+        cols = data.columns.levels[level]
         iter_data = {
-            col: data.loc[:, data.columns.get_level_values(0) == col] for col in cols
+            col: data.loc[:, data.columns.get_level_values(level) == col]
+            for col in cols
         }
     return iter_data
 

From 4c4a15899f2583bdd223b7030a2de0ed5f64a086 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Sat, 22 May 2021 14:55:41 +0200
Subject: [PATCH 116/142] doc

---
 pandas/plotting/_matplotlib/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index c71d084844a8e..dffe64da186ef 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -148,7 +148,7 @@ def __init__(
             self.columns = com.maybe_make_list(column)
 
         # When `by` is explicitly assigned, grouped data size will be defined, and
-        # this will determine number of subplots to have, aka the size of `self.axes`
+        # this will determine number of subplots to have, aka `self.nseries`
         if self.by:
             if self._kind == "hist":
                 self._grouped_data_size = len(data.groupby(self.by))

From ea7e5b15594e6f865cf59d33e542c22366643ad8 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Sat, 22 May 2021 20:53:32 +0200
Subject: [PATCH 117/142] Add tests

---
 doc/source/whatsnew/v1.3.0.rst            |   1 +
 pandas/tests/plotting/test_hist_box_by.py | 287 ++++++++++++++++++++++
 pandas/tests/plotting/test_hist_by.py     | 120 ---------
 3 files changed, 288 insertions(+), 120 deletions(-)
 create mode 100644 pandas/tests/plotting/test_hist_box_by.py
 delete mode 100644 pandas/tests/plotting/test_hist_by.py

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index c26f8288f59ab..e29a5928e6abc 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -230,6 +230,7 @@ Other enhancements
 - Add keyword ``sort`` to :func:`pivot_table` to allow non-sorting of the result (:issue:`39143`)
 - Add keyword ``dropna`` to :meth:`DataFrame.value_counts` to allow counting rows that include ``NA`` values (:issue:`41325`)
 - :meth:`Series.replace` will now cast results to ``PeriodDtype`` where possible instead of ``object`` dtype (:issue:`41526`)
+- Add support for assigning values to ``by`` argument in :meth:``DataFrame.plot.hist`` and :meth:``DataFrame.plot.box`` (:issue:`15079`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/tests/plotting/test_hist_box_by.py b/pandas/tests/plotting/test_hist_box_by.py
new file mode 100644
index 0000000000000..43cb02270f338
--- /dev/null
+++ b/pandas/tests/plotting/test_hist_box_by.py
@@ -0,0 +1,287 @@
+import re
+
+import numpy as np
+import pytest
+
+import pandas.util._test_decorators as td
+
+from pandas import DataFrame
+import pandas._testing as tm
+from pandas.tests.plotting.common import (
+    TestPlotBase,
+    _check_plot_works,
+)
+
+
+def test_hist_box_with_by_df():
+    np.random.seed(0)
+    df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
+    df["C"] = np.random.choice(["a", "b", "c"], 30)
+    df["D"] = np.random.choice(["a", "b", "c"], 30)
+    return df
+
+
+@td.skip_if_no_mpl
+class TestHistWithBy(TestPlotBase):
+    def setup_method(self, method):
+        TestPlotBase.setup_method(self, method)
+        import matplotlib as mpl
+
+        mpl.rcdefaults()
+        self.hist_df = test_hist_box_with_by_df()
+
+    @pytest.mark.parametrize(
+        "by, column, titles, legends",
+        [
+            ("C", "A", ["a", "b", "c"], [["A"]] * 3),
+            ("C", ["A", "B"], ["a", "b", "c"], [["A", "B"]] * 3),
+            ("C", None, ["a", "b", "c"], [["A", "B"]] * 3),
+            (
+                ["C", "D"],
+                "A",
+                [
+                    "(a, a)",
+                    "(a, b)",
+                    "(a, c)",
+                    "(b, a)",
+                    "(b, b)",
+                    "(b, c)",
+                    "(c, a)",
+                    "(c, b)",
+                    "(c, c)",
+                ],
+                [["A"]] * 9,
+            ),
+            (
+                ["C", "D"],
+                ["A", "B"],
+                [
+                    "(a, a)",
+                    "(a, b)",
+                    "(a, c)",
+                    "(b, a)",
+                    "(b, b)",
+                    "(b, c)",
+                    "(c, a)",
+                    "(c, b)",
+                    "(c, c)",
+                ],
+                [["A", "B"]] * 9,
+            ),
+            (
+                ["C", "D"],
+                None,
+                [
+                    "(a, a)",
+                    "(a, b)",
+                    "(a, c)",
+                    "(b, a)",
+                    "(b, b)",
+                    "(b, c)",
+                    "(c, a)",
+                    "(c, b)",
+                    "(c, c)",
+                ],
+                [["A", "B"]] * 9,
+            ),
+        ],
+    )
+    def test_hist_plot_by_argument(self, by, column, titles, legends):
+        # GH 15079
+        axes = _check_plot_works(self.hist_df.plot.hist, column=column, by=by)
+        result_titles = [ax.get_title() for ax in axes]
+        result_legends = [[l.get_text() for l in ax.get_legend().texts] for ax in axes]
+
+        assert result_legends == legends
+        assert result_titles == titles
+
+    @pytest.mark.slow
+    @pytest.mark.parametrize(
+        "by, column, layout, axes_num",
+        [
+            (["C"], "A", (2, 2), 3),
+            ("C", "A", (2, 2), 3),
+            (["C"], ["A"], (1, 3), 3),
+            ("C", None, (3, 1), 3),
+            ("C", ["A", "B"], (3, 1), 3),
+            (["C", "D"], "A", (9, 1), 9),
+            (["C", "D"], "A", (3, 3), 9),
+            (["C", "D"], ["A"], (5, 2), 9),
+            (["C", "D"], ["A", "B"], (9, 1), 9),
+            (["C", "D"], None, (9, 1), 9),
+            (["C", "D"], ["A", "B"], (5, 2), 9),
+        ],
+    )
+    def test_hist_plot_layout_with_by(self, by, column, layout, axes_num):
+        # GH 15079
+        # _check_plot_works adds an ax so catch warning. see GH #13188
+        with tm.assert_produces_warning(UserWarning):
+            axes = _check_plot_works(
+                self.hist_df.plot.hist, column=column, by=by, layout=layout
+            )
+        self._check_axes_shape(axes, axes_num=axes_num, layout=layout)
+
+    def test_hist_plot_invalid_layout_with_by_raises(self):
+        # GH 15079, test if error is raised when invalid layout is given
+
+        # layout too small for all 3 plots
+        msg = "larger than required size"
+        with pytest.raises(ValueError, match=msg):
+            self.hist_df.plot.hist(column=["A", "B"], by="C", layout=(1, 1))
+
+        # invalid format for layout
+        msg = re.escape("Layout must be a tuple of (rows, columns)")
+        with pytest.raises(ValueError, match=msg):
+            self.hist_df.plot.hist(column=["A", "B"], by="C", layout=(1,))
+
+        msg = "At least one dimension of layout must be positive"
+        with pytest.raises(ValueError, match=msg):
+            self.hist_df.plot.hist(column=["A", "B"], by="C", layout=(-1, -1))
+
+    @pytest.mark.slow
+    def test_axis_share_x_with_by(self):
+        # GH 15079
+        ax1, ax2, ax3 = self.hist_df.plot.hist(column="A", by="C", sharex=True)
+
+        # share x
+        assert ax1._shared_x_axes.joined(ax1, ax2)
+        assert ax2._shared_x_axes.joined(ax1, ax2)
+        assert ax3._shared_x_axes.joined(ax1, ax3)
+        assert ax3._shared_x_axes.joined(ax2, ax3)
+
+        # don't share y
+        assert not ax1._shared_y_axes.joined(ax1, ax2)
+        assert not ax2._shared_y_axes.joined(ax1, ax2)
+        assert not ax3._shared_y_axes.joined(ax1, ax3)
+        assert not ax3._shared_y_axes.joined(ax2, ax3)
+
+    @pytest.mark.slow
+    def test_axis_share_y_with_by(self):
+        # GH 15079
+        ax1, ax2, ax3 = self.hist_df.plot.hist(column="A", by="C", sharey=True)
+
+        # share y
+        assert ax1._shared_y_axes.joined(ax1, ax2)
+        assert ax2._shared_y_axes.joined(ax1, ax2)
+        assert ax3._shared_y_axes.joined(ax1, ax3)
+        assert ax3._shared_y_axes.joined(ax2, ax3)
+
+        # don't share x
+        assert not ax1._shared_x_axes.joined(ax1, ax2)
+        assert not ax2._shared_x_axes.joined(ax1, ax2)
+        assert not ax3._shared_x_axes.joined(ax1, ax3)
+        assert not ax3._shared_x_axes.joined(ax2, ax3)
+
+    @pytest.mark.parametrize("figsize", [(12, 8), (20, 10)])
+    def test_figure_shape_hist_with_by(self, figsize):
+        # GH 15079
+        axes = self.hist_df.plot.hist(column="A", by="C", figsize=figsize)
+        self._check_axes_shape(axes, axes_num=3, figsize=figsize)
+
+
+@td.skip_if_no_mpl
+class TestBoxWithBy(TestPlotBase):
+    def setup_method(self, method):
+        TestPlotBase.setup_method(self, method)
+        import matplotlib as mpl
+
+        mpl.rcdefaults()
+        self.box_df = test_hist_box_with_by_df()
+
+    @pytest.mark.parametrize(
+        "by, column, titles, xticklabels",
+        [
+            ("C", "A", ["A"], [["a", "b", "c"]]),
+            (
+                ["C", "D"],
+                "A",
+                ["A"],
+                [
+                    [
+                        "('a', 'a')",
+                        "('a', 'b')",
+                        "('a', 'c')",
+                        "('b', 'a')",
+                        "('b', 'b')",
+                        "('b', 'c')",
+                        "('c', 'a')",
+                        "('c', 'b')",
+                        "('c', 'c')",
+                    ]
+                ],
+            ),
+            ("C", ["A", "B"], ["A", "B"], [["a", "b", "c"]] * 2),
+            (
+                ["C", "D"],
+                ["A", "B"],
+                ["A", "B"],
+                [
+                    [
+                        "('a', 'a')",
+                        "('a', 'b')",
+                        "('a', 'c')",
+                        "('b', 'a')",
+                        "('b', 'b')",
+                        "('b', 'c')",
+                        "('c', 'a')",
+                        "('c', 'b')",
+                        "('c', 'c')",
+                    ]
+                ]
+                * 2,
+            ),
+            (["C"], None, ["A", "B"], [["a", "b", "c"]] * 2),
+        ],
+    )
+    def test_box_plot_by_argument(self, by, column, titles, xticklabels):
+        # GH 15079
+        axes = _check_plot_works(self.box_df.plot.box, column=column, by=by)
+        result_titles = [ax.get_title() for ax in axes]
+        result_xticklabels = [
+            [i.get_text() for i in ax.get_xticklabels()] for ax in axes
+        ]
+
+        assert result_xticklabels == xticklabels
+        assert result_titles == titles
+
+    @pytest.mark.slow
+    @pytest.mark.parametrize(
+        "by, column, layout, axes_num",
+        [
+            (["C"], "A", (1, 1), 1),
+            ("C", "A", (1, 1), 1),
+            ("C", None, (2, 1), 2),
+            ("C", ["A", "B"], (1, 2), 2),
+            (["C", "D"], "A", (1, 1), 1),
+            (["C", "D"], None, (1, 2), 2),
+        ],
+    )
+    def test_box_plot_layout_with_by(self, by, column, layout, axes_num):
+        # GH 15079
+        axes = _check_plot_works(
+            self.box_df.plot.box, column=column, by=by, layout=layout
+        )
+        self._check_axes_shape(axes, axes_num=axes_num, layout=layout)
+
+    def test_box_plot_invalid_layout_with_by_raises(self):
+        # GH 15079, test if error is raised when invalid layout is given
+
+        # layout too small for all 3 plots
+        msg = "larger than required size"
+        with pytest.raises(ValueError, match=msg):
+            self.box_df.plot.box(column=["A", "B"], by=["C", "D"], layout=(1, 1))
+
+        # invalid format for layout
+        msg = re.escape("Layout must be a tuple of (rows, columns)")
+        with pytest.raises(ValueError, match=msg):
+            self.box_df.plot.box(column=["A", "B"], by="C", layout=(1,))
+
+        msg = "At least one dimension of layout must be positive"
+        with pytest.raises(ValueError, match=msg):
+            self.box_df.plot.box(column=["A", "B"], by="C", layout=(-1, -1))
+
+    @pytest.mark.parametrize("figsize", [(12, 8), (20, 10)])
+    def test_figure_shape_hist_with_by(self, figsize):
+        # GH 15079
+        axes = self.box_df.plot.box(column="A", by="C", figsize=figsize)
+        self._check_axes_shape(axes, axes_num=1, figsize=figsize)
diff --git a/pandas/tests/plotting/test_hist_by.py b/pandas/tests/plotting/test_hist_by.py
deleted file mode 100644
index 1e126caf6ef50..0000000000000
--- a/pandas/tests/plotting/test_hist_by.py
+++ /dev/null
@@ -1,120 +0,0 @@
-import re
-
-import numpy as np
-import pytest
-
-import pandas.util._test_decorators as td
-
-from pandas import DataFrame
-import pandas._testing as tm
-from pandas.tests.plotting.common import (
-    TestPlotBase,
-    _check_plot_works,
-)
-
-
-def test_hist_with_by_df():
-    np.random.seed(0)
-    df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
-    df["C"] = np.random.choice(["a", "b", "c"], 30)
-    df["D"] = np.random.choice(["a", "b", "c"], 30)
-    return df
-
-
-@td.skip_if_no_mpl
-class TestDataFrameColor(TestPlotBase):
-    def setup_method(self, method):
-        TestPlotBase.setup_method(self, method)
-        import matplotlib as mpl
-
-        mpl.rcdefaults()
-        self.hist_df = test_hist_with_by_df()
-
-    @pytest.mark.parametrize("by", ["C", ["C", "D"]])
-    @pytest.mark.parametrize("column", ["A", ["A", "B"], None])
-    def test_hist_plot_by_argument(self, by, column):
-        # GH 15079
-        _check_plot_works(self.hist_df.plot.hist, column=column, by=by)
-
-    @pytest.mark.slow
-    @pytest.mark.parametrize(
-        "by, column, layout, axes_num",
-        [
-            (["C"], "A", (2, 2), 3),
-            ("C", "A", (2, 2), 3),
-            (["C"], ["A"], (1, 3), 3),
-            ("C", None, (3, 1), 3),
-            ("C", ["A", "B"], (3, 1), 3),
-            (["C", "D"], "A", (9, 1), 9),
-            (["C", "D"], "A", (3, 3), 9),
-            (["C", "D"], ["A"], (5, 2), 9),
-            (["C", "D"], ["A", "B"], (9, 1), 9),
-            (["C", "D"], None, (9, 1), 9),
-            (["C", "D"], ["A", "B"], (5, 2), 9),
-        ],
-    )
-    def test_hist_plot_layout_with_by(self, by, column, layout, axes_num):
-        # GH 15079
-        # _check_plot_works adds an ax so catch warning. see GH #13188
-        with tm.assert_produces_warning(UserWarning):
-            axes = _check_plot_works(
-                self.hist_df.plot.hist, column=column, by=by, layout=layout
-            )
-        self._check_axes_shape(axes, axes_num=axes_num, layout=layout)
-
-    def test_hist_plot_invalid_layout_with_by_raises(self):
-        # GH 15079, test if error is raised when invalid layout is given
-
-        # layout too small for all 3 plots
-        msg = "larger than required size"
-        with pytest.raises(ValueError, match=msg):
-            self.hist_df.plot.hist(column=["A", "B"], by="C", layout=(1, 1))
-
-        # invalid format for layout
-        msg = re.escape("Layout must be a tuple of (rows, columns)")
-        with pytest.raises(ValueError, match=msg):
-            self.hist_df.plot.hist(column=["A", "B"], by="C", layout=(1,))
-
-        msg = "At least one dimension of layout must be positive"
-        with pytest.raises(ValueError, match=msg):
-            self.hist_df.plot.hist(column=["A", "B"], by="C", layout=(-1, -1))
-
-    @pytest.mark.slow
-    def test_axis_share_x_with_by(self):
-        # GH 15079
-        ax1, ax2, ax3 = self.hist_df.plot.hist(column="A", by="C", sharex=True)
-
-        # share x
-        assert ax1._shared_x_axes.joined(ax1, ax2)
-        assert ax2._shared_x_axes.joined(ax1, ax2)
-        assert ax3._shared_x_axes.joined(ax1, ax3)
-        assert ax3._shared_x_axes.joined(ax2, ax3)
-
-        # don't share y
-        assert not ax1._shared_y_axes.joined(ax1, ax2)
-        assert not ax2._shared_y_axes.joined(ax1, ax2)
-        assert not ax3._shared_y_axes.joined(ax1, ax3)
-        assert not ax3._shared_y_axes.joined(ax2, ax3)
-
-    @pytest.mark.slow
-    def test_axis_share_y_with_by(self):
-        # GH 15079
-        ax1, ax2, ax3 = self.hist_df.plot.hist(column="A", by="C", sharey=True)
-
-        # share y
-        assert ax1._shared_y_axes.joined(ax1, ax2)
-        assert ax2._shared_y_axes.joined(ax1, ax2)
-        assert ax3._shared_y_axes.joined(ax1, ax3)
-        assert ax3._shared_y_axes.joined(ax2, ax3)
-
-        # don't share x
-        assert not ax1._shared_x_axes.joined(ax1, ax2)
-        assert not ax2._shared_x_axes.joined(ax1, ax2)
-        assert not ax3._shared_x_axes.joined(ax1, ax3)
-        assert not ax3._shared_x_axes.joined(ax2, ax3)
-
-    @pytest.mark.parametrize("figsize", [(12, 8), (20, 10)])
-    def test_figure_shape_hist_with_by(self, figsize):
-        # GH 15079
-        axes = self.hist_df.plot.hist(column="A", by="C", figsize=figsize)
-        self._check_axes_shape(axes, axes_num=3, figsize=figsize)

From 006588eae3c8301e65d2f19ea813542a705b682c Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Sat, 22 May 2021 21:00:16 +0200
Subject: [PATCH 118/142] flake8

---
 pandas/tests/plotting/test_hist_box_by.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/plotting/test_hist_box_by.py b/pandas/tests/plotting/test_hist_box_by.py
index 43cb02270f338..8ecfd5a467cff 100644
--- a/pandas/tests/plotting/test_hist_box_by.py
+++ b/pandas/tests/plotting/test_hist_box_by.py
@@ -90,7 +90,9 @@ def test_hist_plot_by_argument(self, by, column, titles, legends):
         # GH 15079
         axes = _check_plot_works(self.hist_df.plot.hist, column=column, by=by)
         result_titles = [ax.get_title() for ax in axes]
-        result_legends = [[l.get_text() for l in ax.get_legend().texts] for ax in axes]
+        result_legends = [
+            [legend.get_text() for legend in ax.get_legend().texts] for ax in axes
+        ]
 
         assert result_legends == legends
         assert result_titles == titles
@@ -238,7 +240,7 @@ def test_box_plot_by_argument(self, by, column, titles, xticklabels):
         axes = _check_plot_works(self.box_df.plot.box, column=column, by=by)
         result_titles = [ax.get_title() for ax in axes]
         result_xticklabels = [
-            [i.get_text() for i in ax.get_xticklabels()] for ax in axes
+            [label.get_text() for label in ax.get_xticklabels()] for ax in axes
         ]
 
         assert result_xticklabels == xticklabels

From 4f0a1dc827d066a1a4373c8685cbe124067fd17f Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Sat, 22 May 2021 21:01:18 +0200
Subject: [PATCH 119/142] move file

---
 pandas/tests/plotting/{ => frame}/test_hist_box_by.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename pandas/tests/plotting/{ => frame}/test_hist_box_by.py (100%)

diff --git a/pandas/tests/plotting/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py
similarity index 100%
rename from pandas/tests/plotting/test_hist_box_by.py
rename to pandas/tests/plotting/frame/test_hist_box_by.py

From e1579e227f8cc5966e569fcf391497fd4be2eb28 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Mon, 24 May 2021 11:07:58 +0200
Subject: [PATCH 120/142] pprint label

---
 pandas/plotting/_matplotlib/boxplot.py        |  2 +-
 .../tests/plotting/frame/test_hist_box_by.py  | 36 +++++++++----------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py
index b3526845604cb..7eac2329f38b8 100644
--- a/pandas/plotting/_matplotlib/boxplot.py
+++ b/pandas/plotting/_matplotlib/boxplot.py
@@ -158,7 +158,7 @@ def _make_plot(self):
                 # When `by` is assigned, the ticklabels will become unique grouped
                 # values, instead of label which is used as subtitle in this case.
                 if self.by is not None:
-                    label = self.data.columns.levels[0]
+                    label = [pprint_thing(col) for col in self.data.columns.levels[0]]
                 self._set_ticklabels(ax, label)
         else:
             y = self.data.values.T
diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py
index 8ecfd5a467cff..11d87fa6bcbd1 100644
--- a/pandas/tests/plotting/frame/test_hist_box_by.py
+++ b/pandas/tests/plotting/frame/test_hist_box_by.py
@@ -200,15 +200,15 @@ def setup_method(self, method):
                 ["A"],
                 [
                     [
-                        "('a', 'a')",
-                        "('a', 'b')",
-                        "('a', 'c')",
-                        "('b', 'a')",
-                        "('b', 'b')",
-                        "('b', 'c')",
-                        "('c', 'a')",
-                        "('c', 'b')",
-                        "('c', 'c')",
+                        "(a, a)",
+                        "(a, b)",
+                        "(a, c)",
+                        "(b, a)",
+                        "(b, b)",
+                        "(b, c)",
+                        "(c, a)",
+                        "(c, b)",
+                        "(c, c)",
                     ]
                 ],
             ),
@@ -219,15 +219,15 @@ def setup_method(self, method):
                 ["A", "B"],
                 [
                     [
-                        "('a', 'a')",
-                        "('a', 'b')",
-                        "('a', 'c')",
-                        "('b', 'a')",
-                        "('b', 'b')",
-                        "('b', 'c')",
-                        "('c', 'a')",
-                        "('c', 'b')",
-                        "('c', 'c')",
+                        "(a, a)",
+                        "(a, b)",
+                        "(a, c)",
+                        "(b, a)",
+                        "(b, b)",
+                        "(b, c)",
+                        "(c, a)",
+                        "(c, b)",
+                        "(c, c)",
                     ]
                 ]
                 * 2,

From e6e96d384cedb27893beb4deca5bf17b9825b197 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Tue, 29 Jun 2021 17:57:33 +0200
Subject: [PATCH 121/142] parametrize tests

---
 .../tests/plotting/frame/test_hist_box_by.py  | 46 ++++++++-----------
 1 file changed, 20 insertions(+), 26 deletions(-)

diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py
index 1383c113bd966..2856ab201ba06 100644
--- a/pandas/tests/plotting/frame/test_hist_box_by.py
+++ b/pandas/tests/plotting/frame/test_hist_box_by.py
@@ -123,22 +123,19 @@ def test_hist_plot_layout_with_by(self, by, column, layout, axes_num):
             )
         self._check_axes_shape(axes, axes_num=axes_num, layout=layout)
 
-    def test_hist_plot_invalid_layout_with_by_raises(self):
+    @pytest.mark.parametrize(
+        "msg, by, layout",
+        [
+            ("larger than required size", ["C", "D"], (1, 1)),
+            (re.escape("Layout must be a tuple of (rows, columns)"), "C", (1,)),
+            ("At least one dimension of layout must be positive", "C", (-1, -1)),
+        ],
+    )
+    def test_hist_plot_invalid_layout_with_by_raises(self, msg, by, layout):
         # GH 15079, test if error is raised when invalid layout is given
 
-        # layout too small for all 3 plots
-        msg = "larger than required size"
-        with pytest.raises(ValueError, match=msg):
-            self.hist_df.plot.hist(column=["A", "B"], by="C", layout=(1, 1))
-
-        # invalid format for layout
-        msg = re.escape("Layout must be a tuple of (rows, columns)")
-        with pytest.raises(ValueError, match=msg):
-            self.hist_df.plot.hist(column=["A", "B"], by="C", layout=(1,))
-
-        msg = "At least one dimension of layout must be positive"
         with pytest.raises(ValueError, match=msg):
-            self.hist_df.plot.hist(column=["A", "B"], by="C", layout=(-1, -1))
+            self.hist_df.plot.hist(column=["A", "B"], by=by, layout=layout)
 
     @pytest.mark.slow
     def test_axis_share_x_with_by(self):
@@ -265,22 +262,19 @@ def test_box_plot_layout_with_by(self, by, column, layout, axes_num):
         )
         self._check_axes_shape(axes, axes_num=axes_num, layout=layout)
 
-    def test_box_plot_invalid_layout_with_by_raises(self):
+    @pytest.mark.parametrize(
+        "msg, by, layout",
+        [
+            ("larger than required size", ["C", "D"], (1, 1)),
+            (re.escape("Layout must be a tuple of (rows, columns)"), "C", (1,)),
+            ("At least one dimension of layout must be positive", "C", (-1, -1)),
+        ],
+    )
+    def test_box_plot_invalid_layout_with_by_raises(self, msg, by, layout):
         # GH 15079, test if error is raised when invalid layout is given
 
-        # layout too small for all 3 plots
-        msg = "larger than required size"
-        with pytest.raises(ValueError, match=msg):
-            self.box_df.plot.box(column=["A", "B"], by=["C", "D"], layout=(1, 1))
-
-        # invalid format for layout
-        msg = re.escape("Layout must be a tuple of (rows, columns)")
-        with pytest.raises(ValueError, match=msg):
-            self.box_df.plot.box(column=["A", "B"], by="C", layout=(1,))
-
-        msg = "At least one dimension of layout must be positive"
         with pytest.raises(ValueError, match=msg):
-            self.box_df.plot.box(column=["A", "B"], by="C", layout=(-1, -1))
+            self.box_df.plot.box(column=["A", "B"], by=by, layout=layout)
 
     @pytest.mark.parametrize("figsize", [(12, 8), (20, 10)])
     def test_figure_shape_hist_with_by(self, figsize):

From 52e47f1206c24df40ba625fd9165f24155071cb9 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Tue, 29 Jun 2021 19:20:35 +0200
Subject: [PATCH 122/142] Fix test

---
 pandas/plotting/_matplotlib/core.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 85e96195ba56d..517660cbd25bf 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -135,6 +135,11 @@ def __init__(
         self.data = data
         self.by = com.maybe_make_list(by)
 
+        # For `hist` plot, need to get grouped original data before `self.data` is
+        # updated later
+        if self.by and self._kind == "hist":
+            self._grouped = data.groupby(self.by)
+
         # Assign the rest of columns into self.columns if by is explicitly defined
         # while column is not, so as to keep the same behaviour with current df.hist
         # or df.boxplot.
@@ -296,7 +301,7 @@ def nseries(self) -> int:
         if self.data.ndim == 1:
             return 1
         elif self.by and self._kind == "hist":
-            return len(self.data.groupby(self.by))
+            return len(self._grouped)
         elif self.by and self._kind == "box":
             return len(self.columns)
         else:

From bc2f2821d414aec21c5283de3f7f1f8bb8fa4a44 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Wed, 30 Jun 2021 20:22:57 +0200
Subject: [PATCH 123/142] Code changes based on Marc reviews

---
 doc/source/whatsnew/v1.3.0.rst                  |  1 -
 doc/source/whatsnew/v1.4.0.rst                  |  2 +-
 pandas/plotting/_core.py                        |  4 +++-
 pandas/plotting/_matplotlib/boxplot.py          | 14 ++++++++------
 pandas/plotting/_matplotlib/core.py             | 16 ++++++++--------
 pandas/plotting/_matplotlib/groupby.py          | 14 +++++++-------
 pandas/plotting/_matplotlib/hist.py             |  7 +++----
 pandas/tests/plotting/frame/test_hist_box_by.py |  6 +++---
 8 files changed, 33 insertions(+), 31 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 2f573b6f16141..60dc7096c9d1e 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -275,7 +275,6 @@ Other enhancements
 - Add keyword ``sort`` to :func:`pivot_table` to allow non-sorting of the result (:issue:`39143`)
 - Add keyword ``dropna`` to :meth:`DataFrame.value_counts` to allow counting rows that include ``NA`` values (:issue:`41325`)
 - :meth:`Series.replace` will now cast results to ``PeriodDtype`` where possible instead of ``object`` dtype (:issue:`41526`)
-- Add support for assigning values to ``by`` argument in :meth:`DataFrame.plot.hist` and :meth:`DataFrame.plot.box` (:issue:`15079`)
 - Improved error message in ``corr`` and ``cov`` methods on :class:`.Rolling`, :class:`.Expanding`, and :class:`.ExponentialMovingWindow` when ``other`` is not a :class:`DataFrame` or :class:`Series` (:issue:`41741`)
 - :meth:`Series.between` can now accept ``left`` or ``right`` as arguments to ``inclusive`` to include only the left or right boundary (:issue:`40245`)
 - :meth:`DataFrame.explode` now supports exploding multiple columns. Its ``column`` argument now also accepts a list of str or tuples for exploding on multiple columns at the same time (:issue:`39240`)
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 81545ada63ce5..d652097eca22f 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -29,7 +29,7 @@ enhancement2
 
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
--
+- Add support for assigning values to ``by`` argument in :meth:`DataFrame.plot.hist` and :meth:`DataFrame.plot.box` (:issue:`15079`, :issue:`28373`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index 1ad08ba021392..c8b1984a2f77a 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -1237,6 +1237,8 @@ def box(self, by=None, **kwargs):
         ----------
         by : str or sequence
             Column in the DataFrame to group by.
+
+            .. versionchanged:: 1.4.0
         **kwargs
             Additional keywords are documented in
             :meth:`DataFrame.plot`.
@@ -1279,7 +1281,7 @@ def hist(self, by=None, bins=10, **kwargs):
         by : str or sequence, optional
             Column in the DataFrame to group by.
 
-            .. versionadded:: 1.3.0
+            .. versionadded:: 1.4.0
 
         bins : int, default 10
             Number of histogram bins to be used.
diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py
index 766ce47fbcf6d..25986af0942a7 100644
--- a/pandas/plotting/_matplotlib/boxplot.py
+++ b/pandas/plotting/_matplotlib/boxplot.py
@@ -137,19 +137,19 @@ def _make_plot(self):
             self._return_obj = pd.Series(dtype=object)
 
             # Re-create iterated data if `by` is assigned by users
-            if self.by is None:
-                data = self.data
-            else:
-                data = create_iter_data_given_by(self.data, self._kind)
+            data = (
+                create_iter_data_given_by(self.data, self._kind)
+                if self.by
+                else self.data
+            )
 
             for i, (label, y) in enumerate(self._iter_data(data=data)):
                 ax = self._get_ax(i)
                 kwds = self.kwds.copy()
-                ticklabels = [pprint_thing(label)]
 
                 # When by is applied, show title for subplots to know which group it is
                 # just like df.boxplot, and need to apply T on y to provide right input
-                if self.by is not None:
+                if self.by:
                     y = y.T
                     ax.set_title(pprint_thing(label))
 
@@ -158,6 +158,8 @@ def _make_plot(self):
                     ticklabels = [
                         pprint_thing(col) for col in self.data.columns.levels[0]
                     ]
+                else:
+                    ticklabels = [pprint_thing(label)]
 
                 ret, bp = self._plot(
                     ax, y, column_num=i, return_type=self.return_type, **kwds
diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 517660cbd25bf..6735191354fbc 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -135,15 +135,10 @@ def __init__(
         self.data = data
         self.by = com.maybe_make_list(by)
 
-        # For `hist` plot, need to get grouped original data before `self.data` is
-        # updated later
-        if self.by and self._kind == "hist":
-            self._grouped = data.groupby(self.by)
-
         # Assign the rest of columns into self.columns if by is explicitly defined
-        # while column is not, so as to keep the same behaviour with current df.hist
-        # or df.boxplot.
-        if self.by and column is None:
+        # while column is not
+        # TODO: Might deprecate `column` argument in future PR (#28373)
+        if column is None:
             self.columns = [
                 col
                 for col in data.columns
@@ -152,6 +147,11 @@ def __init__(
         else:
             self.columns = com.maybe_make_list(column)
 
+        # For `hist` plot, need to get grouped original data before `self.data` is
+        # updated later
+        if self.by and self._kind == "hist":
+            self._grouped = data.groupby(self.by)
+
         self.kind = kind
 
         self.sort_columns = sort_columns
diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py
index 874bf9d7963cc..3d37506907236 100644
--- a/pandas/plotting/_matplotlib/groupby.py
+++ b/pandas/plotting/_matplotlib/groupby.py
@@ -57,9 +57,10 @@ def create_iter_data_given_by(
      'h2': DataFrame({'a': [np.nan, np.nan, 5], 'b': [np.nan, np.nan, 6]})}
     """
 
-    # For `hist` plot, before transformation, the values in level 0 are
-    # actual subplot titles, and used for column subselection and iteration;
-    # For `box` plot, that's values in level 1
+    # For `hist` plot, before transformation, the values in level 0 are values
+    # in groups and subplot titles, and later used for column subselection and
+    # iteration; For `box` plot, values in level 1 are column names to show,
+    # and are used for iteration and as subplots titles.
     if kind == "hist":
         level = 0
     elif kind == "box":
@@ -74,11 +75,10 @@ def create_iter_data_given_by(
 
     # Select sub-columns based on the value of first level of MI
     assert isinstance(data.columns, MultiIndex)
-    cols = data.columns.levels[level]
-    iter_data = {
-        col: data.loc[:, data.columns.get_level_values(level) == col] for col in cols
+    return {
+        col: data.loc[:, data.columns.get_level_values(level) == col]
+        for col in data.columns.levels[level]
     }
-    return iter_data
 
 
 def reconstruct_data_with_by(
diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 181cea57fff49..7fdf1977b8089 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -101,10 +101,9 @@ def _make_plot(self):
         stacking_id = self._get_stacking_id()
 
         # Re-create iterated data if `by` is assigned by users
-        if self.by is None:
-            data = self.data
-        else:
-            data = create_iter_data_given_by(self.data, self._kind)
+        data = (
+            create_iter_data_given_by(self.data, self._kind) if self.by else self.data
+        )
 
         for i, (label, y) in enumerate(self._iter_data(data=data)):
             ax = self._get_ax(i)
diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py
index 2856ab201ba06..25380fe3238cf 100644
--- a/pandas/tests/plotting/frame/test_hist_box_by.py
+++ b/pandas/tests/plotting/frame/test_hist_box_by.py
@@ -13,7 +13,7 @@
 )
 
 
-def create_hist_box_with_by_df():
+def _create_hist_box_with_by_df():
     np.random.seed(0)
     df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
     df["C"] = np.random.choice(["a", "b", "c"], 30)
@@ -28,7 +28,7 @@ def setup_method(self, method):
         import matplotlib as mpl
 
         mpl.rcdefaults()
-        self.hist_df = create_hist_box_with_by_df()
+        self.hist_df = _create_hist_box_with_by_df()
 
     @pytest.mark.parametrize(
         "by, column, titles, legends",
@@ -185,7 +185,7 @@ def setup_method(self, method):
         import matplotlib as mpl
 
         mpl.rcdefaults()
-        self.box_df = create_hist_box_with_by_df()
+        self.box_df = _create_hist_box_with_by_df()
 
     @pytest.mark.parametrize(
         "by, column, titles, xticklabels",

From ceeb3c5ee04c8ce992a29e97dc33d5c1826e0a33 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Wed, 30 Jun 2021 20:24:44 +0200
Subject: [PATCH 124/142] update doc

---
 doc/source/whatsnew/v1.4.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index d652097eca22f..8834e90a8b4b2 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -29,7 +29,7 @@ enhancement2
 
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
-- Add support for assigning values to ``by`` argument in :meth:`DataFrame.plot.hist` and :meth:`DataFrame.plot.box` (:issue:`15079`, :issue:`28373`)
+- Add support for assigning values to ``by`` argument in :meth:`DataFrame.plot.hist` and :meth:`DataFrame.plot.box` (:issue:`15079`)
 -
 
 .. ---------------------------------------------------------------------------

From 4fea841d3c4c536f559f7806c6816e9904d4c919 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Wed, 30 Jun 2021 20:32:44 +0200
Subject: [PATCH 125/142] version change

---
 pandas/plotting/_core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index c8b1984a2f77a..1834aa4ac4013 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -1238,7 +1238,7 @@ def box(self, by=None, **kwargs):
         by : str or sequence
             Column in the DataFrame to group by.
 
-            .. versionchanged:: 1.4.0
+            .. versionadded:: 1.4.0
         **kwargs
             Additional keywords are documented in
             :meth:`DataFrame.plot`.

From 3ea2603913fca84255f12a656be7b8c9b177a6de Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Wed, 30 Jun 2021 21:03:01 +0200
Subject: [PATCH 126/142] Use self.by

---
 pandas/plotting/_matplotlib/core.py    | 2 +-
 pandas/plotting/_matplotlib/groupby.py | 2 +-
 pandas/plotting/_matplotlib/hist.py    | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 6735191354fbc..df112fa39f72c 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -451,7 +451,7 @@ def _compute_plot_data(self):
             data = data.to_frame(name=label)
 
         # GH15079 reconstruct data if by is defined
-        if self.by is not None:
+        if self.by:
             self.subplots = True
             data = reconstruct_data_with_by(self.data, by=self.by, cols=self.columns)
 
diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py
index 3d37506907236..a8853a66080cf 100644
--- a/pandas/plotting/_matplotlib/groupby.py
+++ b/pandas/plotting/_matplotlib/groupby.py
@@ -132,6 +132,6 @@ def reformat_hist_y_given_by(
     If by is None, input y is 1-d with NaN removed; and if by is not None, groupby
     will take place and input y is multi-dimensional array.
     """
-    if by is not None and len(y.shape) > 1:
+    if by and len(y.shape) > 1:
         return np.array([remove_na_arraylike(col) for col in y.T]).T
     return remove_na_arraylike(y)
diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 7fdf1977b8089..961b13083293d 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -122,7 +122,7 @@ def _make_plot(self):
 
             # the bins is multi-dimension array now and each plot need only 1-d and
             # when by is applied, label should be columns that are grouped
-            if self.by is not None:
+            if self.by:
                 kwds["bins"] = kwds["bins"][i]
                 kwds["label"] = self.columns
                 kwds.pop("color")
@@ -139,7 +139,7 @@ def _make_plot(self):
             artists = self._plot(ax, y, column_num=i, stacking_id=stacking_id, **kwds)
 
             # when by is applied, show title for subplots to know which group it is
-            if self.by is not None:
+            if self.by:
                 ax.set_title(pprint_thing(label))
 
             self._append_legend_handles_labels(artists[0], label)

From 9f4813943ae2dd7adb60a8ade1cf95d786c8e012 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Wed, 30 Jun 2021 21:05:31 +0200
Subject: [PATCH 127/142] better code

---
 pandas/plotting/_matplotlib/hist.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 961b13083293d..28cdf2bd57adb 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -53,12 +53,11 @@ def _args_adjust(self):
         # calculate bin number separately in different subplots
         # where subplots are created based on by argument
         if is_integer(self.bins):
-            if self.by is None:
-                self.bins = self._calculate_bins(self.data)
-
-            else:
+            if self.by:
                 grouped = self.data.groupby(self.by)[self.columns]
                 self.bins = [self._calculate_bins(group) for key, group in grouped]
+            else:
+                self.bins = self._calculate_bins(self.data)
 
         if is_list_like(self.bottom):
             self.bottom = np.array(self.bottom)

From b1094e35ab9a944c526338f0b9077b78600a7465 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Wed, 30 Jun 2021 21:07:58 +0200
Subject: [PATCH 128/142] better inline comment

---
 pandas/plotting/_matplotlib/groupby.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py
index a8853a66080cf..558ce3575c07a 100644
--- a/pandas/plotting/_matplotlib/groupby.py
+++ b/pandas/plotting/_matplotlib/groupby.py
@@ -73,7 +73,8 @@ def create_iter_data_given_by(
 
     iter_data: Dict[str, FrameOrSeriesUnion]
 
-    # Select sub-columns based on the value of first level of MI
+    # Select sub-columns based on the value of level of MI, and if `by` is
+    # assigned, data must be a MI DataFrame
     assert isinstance(data.columns, MultiIndex)
     return {
         col: data.loc[:, data.columns.get_level_values(level) == col]

From 97bde5959803a1a5541bf413dbf0b9bc16247524 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Wed, 30 Jun 2021 21:11:25 +0200
Subject: [PATCH 129/142] code changes based on Marc reviews

---
 pandas/plotting/_matplotlib/core.py    | 6 +++---
 pandas/plotting/_matplotlib/groupby.py | 4 +---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index df112fa39f72c..0a4cbf70245ed 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -138,14 +138,14 @@ def __init__(
         # Assign the rest of columns into self.columns if by is explicitly defined
         # while column is not
         # TODO: Might deprecate `column` argument in future PR (#28373)
-        if column is None:
+        if column:
+            self.columns = com.maybe_make_list(column)
+        else:
             self.columns = [
                 col
                 for col in data.columns
                 if col not in self.by and is_numeric_dtype(data[col])
             ]
-        else:
-            self.columns = com.maybe_make_list(column)
 
         # For `hist` plot, need to get grouped original data before `self.data` is
         # updated later
diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py
index 558ce3575c07a..69bb335445433 100644
--- a/pandas/plotting/_matplotlib/groupby.py
+++ b/pandas/plotting/_matplotlib/groupby.py
@@ -71,8 +71,6 @@ def create_iter_data_given_by(
             f"kind 'hist' and 'box' plots, but used with '{kind}'"
         )
 
-    iter_data: Dict[str, FrameOrSeriesUnion]
-
     # Select sub-columns based on the value of level of MI, and if `by` is
     # assigned, data must be a MI DataFrame
     assert isinstance(data.columns, MultiIndex)
@@ -126,7 +124,7 @@ def reconstruct_data_with_by(
 
 
 def reformat_hist_y_given_by(
-    y: Union[Series, np.ndarray], by: Optional[IndexLabel] = None
+    y: Union[Series, np.ndarray], by: IndexLabel | None = None
 ) -> Union[Series, np.ndarray]:
     """Internal function to reformat y given `by` is applied or not for hist plot.
 

From 444a964f883949da5ea60747374cd2406741063e Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Wed, 30 Jun 2021 21:19:21 +0200
Subject: [PATCH 130/142] minor fix

---
 pandas/plotting/_matplotlib/groupby.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py
index 69bb335445433..1af8257c8f8f9 100644
--- a/pandas/plotting/_matplotlib/groupby.py
+++ b/pandas/plotting/_matplotlib/groupby.py
@@ -1,6 +1,5 @@
 from typing import (
     Dict,
-    Optional,
     Union,
 )
 

From b66dad0e0f02462370d45a3c6ce6b61ac055dc55 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Wed, 30 Jun 2021 21:50:28 +0200
Subject: [PATCH 131/142] mypy

---
 pandas/plotting/_matplotlib/groupby.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py
index 1af8257c8f8f9..d46a57a935ee7 100644
--- a/pandas/plotting/_matplotlib/groupby.py
+++ b/pandas/plotting/_matplotlib/groupby.py
@@ -123,7 +123,7 @@ def reconstruct_data_with_by(
 
 
 def reformat_hist_y_given_by(
-    y: Union[Series, np.ndarray], by: IndexLabel | None = None
+    y: Union[Series, np.ndarray], by: IndexLabel | None
 ) -> Union[Series, np.ndarray]:
     """Internal function to reformat y given `by` is applied or not for hist plot.
 

From 982f56237d1b19db078b3213a0a1a84b54f858d9 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Wed, 30 Jun 2021 21:57:09 +0200
Subject: [PATCH 132/142] add future annotation

---
 pandas/plotting/_matplotlib/groupby.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py
index d46a57a935ee7..24ebaba943416 100644
--- a/pandas/plotting/_matplotlib/groupby.py
+++ b/pandas/plotting/_matplotlib/groupby.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from typing import (
     Dict,
     Union,

From c76ad67431b073c05abfe3f0a42ea62b71942eb6 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Wed, 30 Jun 2021 22:12:00 +0200
Subject: [PATCH 133/142] fix pre commit

---
 pandas/plotting/_matplotlib/groupby.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py
index 24ebaba943416..0c37f6092b63a 100644
--- a/pandas/plotting/_matplotlib/groupby.py
+++ b/pandas/plotting/_matplotlib/groupby.py
@@ -1,10 +1,5 @@
 from __future__ import annotations
 
-from typing import (
-    Dict,
-    Union,
-)
-
 import numpy as np
 
 from pandas._typing import (
@@ -24,7 +19,7 @@
 
 def create_iter_data_given_by(
     data: DataFrame, kind: str = "hist"
-) -> Dict[str, FrameOrSeriesUnion]:
+) -> dict[str, FrameOrSeriesUnion]:
     """
     Create data for iteration given `by` is assigned or not, and it is only
     used in both hist and boxplot.
@@ -125,8 +120,8 @@ def reconstruct_data_with_by(
 
 
 def reformat_hist_y_given_by(
-    y: Union[Series, np.ndarray], by: IndexLabel | None
-) -> Union[Series, np.ndarray]:
+    y: Series | np.ndarray, by: IndexLabel | None
+) -> Series | np.ndarray:
     """Internal function to reformat y given `by` is applied or not for hist plot.
 
     If by is None, input y is 1-d with NaN removed; and if by is not None, groupby

From 2c1aa33876701059b9a9e873e170ff7e81d61265 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Wed, 30 Jun 2021 22:36:49 +0200
Subject: [PATCH 134/142] minor experimental fix

---
 pandas/plotting/_matplotlib/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 0a4cbf70245ed..b34f88d975abd 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -144,7 +144,7 @@ def __init__(
             self.columns = [
                 col
                 for col in data.columns
-                if col not in self.by and is_numeric_dtype(data[col])
+                if self.by and col not in self.by and is_numeric_dtype(data[col])
             ]
 
         # For `hist` plot, need to get grouped original data before `self.data` is

From 68965463e572c070213082d40e38c479f79579d5 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Wed, 30 Jun 2021 22:42:35 +0200
Subject: [PATCH 135/142] better doc string

---
 pandas/plotting/_matplotlib/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index b34f88d975abd..4949c765dd398 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -136,7 +136,7 @@ def __init__(
         self.by = com.maybe_make_list(by)
 
         # Assign the rest of columns into self.columns if by is explicitly defined
-        # while column is not
+        # while column is not, only need `columns` in hist/box plot.
         # TODO: Might deprecate `column` argument in future PR (#28373)
         if column:
             self.columns = com.maybe_make_list(column)

From 3c5430249346006683d068428d00dc6d2318b339 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Thu, 1 Jul 2021 07:19:25 +0200
Subject: [PATCH 136/142] fixup doc fail

---
 pandas/plotting/_matplotlib/core.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 4949c765dd398..20701f92a690d 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -39,6 +39,7 @@
 )
 
 import pandas.core.common as com
+from pandas.core.frame import DataFrame
 
 from pandas.io.formats.printing import pprint_thing
 from pandas.plotting._matplotlib.compat import mpl_ge_3_0_0
@@ -136,16 +137,17 @@ def __init__(
         self.by = com.maybe_make_list(by)
 
         # Assign the rest of columns into self.columns if by is explicitly defined
-        # while column is not, only need `columns` in hist/box plot.
+        # while column is not, only need `columns` in hist/box plot when it's DF
         # TODO: Might deprecate `column` argument in future PR (#28373)
-        if column:
-            self.columns = com.maybe_make_list(column)
-        else:
-            self.columns = [
-                col
-                for col in data.columns
-                if self.by and col not in self.by and is_numeric_dtype(data[col])
-            ]
+        if isinstance(data, DataFrame):
+            if column:
+                self.columns = com.maybe_make_list(column)
+            else:
+                self.columns = [
+                    col
+                    for col in data.columns
+                    if self.by and col not in self.by and is_numeric_dtype(data[col])
+                ]
 
         # For `hist` plot, need to get grouped original data before `self.data` is
         # updated later

From 2d20178acebe9854ef9d4b80ecca397476a36a1b Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Thu, 1 Jul 2021 10:26:27 +0200
Subject: [PATCH 137/142] code change on Macro reviews

---
 pandas/plotting/_matplotlib/core.py           | 18 +++++--
 .../tests/plotting/frame/test_hist_box_by.py  | 50 +++++++++++++++++++
 2 files changed, 63 insertions(+), 5 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 20701f92a690d..973a127f0c801 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -143,11 +143,16 @@ def __init__(
             if column:
                 self.columns = com.maybe_make_list(column)
             else:
-                self.columns = [
-                    col
-                    for col in data.columns
-                    if self.by and col not in self.by and is_numeric_dtype(data[col])
-                ]
+                if self.by:
+                    self.columns = [
+                        col
+                        for col in data.columns
+                        if col not in self.by and is_numeric_dtype(data[col])
+                    ]
+                else:
+                    self.columns = [
+                        col for col in data.columns if is_numeric_dtype(data[col])
+                    ]
 
         # For `hist` plot, need to get grouped original data before `self.data` is
         # updated later
@@ -451,6 +456,9 @@ def _compute_plot_data(self):
             if label is None and data.name is None:
                 label = "None"
             data = data.to_frame(name=label)
+        else:
+            cols = self.columns if self.by is None else self.columns + self.by
+            data = data.loc[:, cols]
 
         # GH15079 reconstruct data if by is defined
         if self.by:
diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py
index 25380fe3238cf..a3014a9b3b5bb 100644
--- a/pandas/tests/plotting/frame/test_hist_box_by.py
+++ b/pandas/tests/plotting/frame/test_hist_box_by.py
@@ -97,6 +97,31 @@ def test_hist_plot_by_argument(self, by, column, titles, legends):
         assert result_legends == legends
         assert result_titles == titles
 
+    @pytest.mark.parametrize(
+        "by, column, legends, title",
+        [
+            ([], ["A"], ["A"], None),
+            (None, "A", ["A"], "hist A"),
+            ([], ["A", "B"], ["A", "B"], "hist A and B"),
+            (None, ["A", "B"], ["A", "B"], "hist A and B"),
+        ],
+    )
+    def test_hist_plot_with_none_empty_list_by(self, by, column, legends, title):
+        # GH 15079
+        axes = _check_plot_works(
+            self.hist_df.plot.hist, column=column, by=by, title=title
+        )
+        result_titles = axes.get_title()
+        result_legends = [legend.get_text() for legend in axes.get_legend().texts]
+
+        assert result_legends == legends
+
+        # Should be no title if it is not subplots
+        if title is None:
+            assert result_titles == ""
+        else:
+            assert result_titles == title
+
     @pytest.mark.slow
     @pytest.mark.parametrize(
         "by, column, layout, axes_num",
@@ -243,6 +268,31 @@ def test_box_plot_by_argument(self, by, column, titles, xticklabels):
         assert result_xticklabels == xticklabels
         assert result_titles == titles
 
+    @pytest.mark.parametrize(
+        "by, column, xticklabels, title",
+        [
+            ([], ["A"], ["A"], None),
+            (None, "A", ["A"], "box A"),
+            ([], ["A", "B"], ["A", "B"], "box A and B"),
+            (None, ["A", "B"], ["A", "B"], "box A and B"),
+        ],
+    )
+    def test_box_plot_with_none_empty_list_by(self, by, column, xticklabels, title):
+        # GH 15079
+        axes = _check_plot_works(
+            self.box_df.plot.box, column=column, by=by, title=title
+        )
+        result_titles = axes.get_title()
+        result_legends = [xtick.get_text() for xtick in axes.get_xticklabels()]
+
+        assert result_legends == xticklabels
+
+        # Should be no title if it is not subplots
+        if title is None:
+            assert result_titles == ""
+        else:
+            assert result_titles == title
+
     @pytest.mark.slow
     @pytest.mark.parametrize(
         "by, column, layout, axes_num",

From a169dfd3887175fc28cf8801aa6c054cc5e0eacb Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Thu, 1 Jul 2021 10:32:12 +0200
Subject: [PATCH 138/142] Add more tests

---
 pandas/tests/plotting/frame/test_hist_box_by.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py
index a3014a9b3b5bb..a0e32ee14650e 100644
--- a/pandas/tests/plotting/frame/test_hist_box_by.py
+++ b/pandas/tests/plotting/frame/test_hist_box_by.py
@@ -103,6 +103,7 @@ def test_hist_plot_by_argument(self, by, column, titles, legends):
             ([], ["A"], ["A"], None),
             (None, "A", ["A"], "hist A"),
             ([], ["A", "B"], ["A", "B"], "hist A and B"),
+            ([], None, ["A", "B"], "hist A and B"),
             (None, ["A", "B"], ["A", "B"], "hist A and B"),
         ],
     )
@@ -274,6 +275,7 @@ def test_box_plot_by_argument(self, by, column, titles, xticklabels):
             ([], ["A"], ["A"], None),
             (None, "A", ["A"], "box A"),
             ([], ["A", "B"], ["A", "B"], "box A and B"),
+            ([], None, ["A", "B"], "box A and B"),
             (None, ["A", "B"], ["A", "B"], "box A and B"),
         ],
     )

From d0b56ff1b63e3400b17b4e922b4261e89e72f6dd Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Thu, 1 Jul 2021 11:28:27 +0200
Subject: [PATCH 139/142] fixup

---
 pandas/plotting/_matplotlib/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 973a127f0c801..64b3c60f308a5 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -456,7 +456,7 @@ def _compute_plot_data(self):
             if label is None and data.name is None:
                 label = "None"
             data = data.to_frame(name=label)
-        else:
+        elif self._kind in ("hist", "box"):
             cols = self.columns if self.by is None else self.columns + self.by
             data = data.loc[:, cols]
 

From 143f286326731cd06ec16ff96fac89fe14dafcf0 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Mon, 12 Jul 2021 08:13:32 +0200
Subject: [PATCH 140/142] changes based on Jeff review

---
 pandas/plotting/_matplotlib/core.py           |  6 +-
 .../tests/plotting/frame/test_hist_box_by.py  | 58 ++++++-------------
 2 files changed, 21 insertions(+), 43 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 9ab513d4383d2..85b00d4f2852c 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -135,10 +135,10 @@ def __init__(
 
         self.data = data
 
-        # if users assign an empty list or tuple, treat them as None
-        # then no group-by will be conducted.
+        # if users assign an empty list or tuple, raise `ValueError`
+        # similar to current `df.box` and `df.hist` APIs.
         if by in ([], ()):
-            by = None
+            raise ValueError("No group keys passed!")
         self.by = com.maybe_make_list(by)
 
         # Assign the rest of columns into self.columns if by is explicitly defined
diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py
index e19bbeedbf22b..339120a503411 100644
--- a/pandas/tests/plotting/frame/test_hist_box_by.py
+++ b/pandas/tests/plotting/frame/test_hist_box_by.py
@@ -135,30 +135,19 @@ def test_hist_plot_by_0(self, by, column, titles, legends):
         assert result_titles == titles
 
     @pytest.mark.parametrize(
-        "by, column, legends, title",
+        "by, column",
         [
-            ([], ["A"], ["A"], None),
-            (None, "A", ["A"], "hist A"),
-            ([], ["A", "B"], ["A", "B"], "hist A and B"),
-            ([], None, ["A", "B"], "hist A and B"),
-            (None, ["A", "B"], ["A", "B"], "hist A and B"),
+            ([], ["A"]),
+            ([], ["A", "B"]),
+            ((), None),
+            ((), ["A", "B"]),
         ],
     )
-    def test_hist_plot_with_none_empty_list_by(self, by, column, legends, title):
+    def test_hist_plot_empty_list_string_tuple_by(self, by, column):
         # GH 15079
-        axes = _check_plot_works(
-            self.hist_df.plot.hist, column=column, by=by, title=title
-        )
-        result_titles = axes.get_title()
-        result_legends = [legend.get_text() for legend in axes.get_legend().texts]
-
-        assert result_legends == legends
-
-        # Should be no title if it is not subplots
-        if title is None:
-            assert result_titles == ""
-        else:
-            assert result_titles == title
+        msg = "No group keys passed"
+        with pytest.raises(ValueError, match=msg):
+            axes = _check_plot_works(self.hist_df.plot.hist, column=column, by=by)
 
     @pytest.mark.slow
     @pytest.mark.parametrize(
@@ -346,30 +335,19 @@ def test_box_plot_by_0(self, by, column, titles, xticklabels):
         assert result_titles == titles
 
     @pytest.mark.parametrize(
-        "by, column, xticklabels, title",
+        "by, column",
         [
-            ([], ["A"], ["A"], None),
-            (None, "A", ["A"], "box A"),
-            ([], ["A", "B"], ["A", "B"], "box A and B"),
-            ([], None, ["A", "B"], "box A and B"),
-            (None, ["A", "B"], ["A", "B"], "box A and B"),
+            ([], ["A"]),
+            ((), "A"),
+            ([], None),
+            ((), ["A", "B"]),
         ],
     )
-    def test_box_plot_with_none_empty_list_by(self, by, column, xticklabels, title):
+    def test_box_plot_with_none_empty_list_by(self, by, column):
         # GH 15079
-        axes = _check_plot_works(
-            self.box_df.plot.box, column=column, by=by, title=title
-        )
-        result_titles = axes.get_title()
-        result_legends = [xtick.get_text() for xtick in axes.get_xticklabels()]
-
-        assert result_legends == xticklabels
-
-        # Should be no title if it is not subplots
-        if title is None:
-            assert result_titles == ""
-        else:
-            assert result_titles == title
+        msg = "No group keys passed"
+        with pytest.raises(ValueError, match=msg):
+            axes = _check_plot_works(self.box_df.plot.box, column=column, by=by)
 
     @pytest.mark.slow
     @pytest.mark.parametrize(

From 283286fe356212c42e45ba1028a4b5c2f6a842b4 Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Mon, 12 Jul 2021 08:14:54 +0200
Subject: [PATCH 141/142] doc

---
 pandas/plotting/_core.py               | 9 +++++++--
 pandas/plotting/_matplotlib/groupby.py | 7 +------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index bd1b8547102b7..e7d1ce869f511 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -1238,7 +1238,10 @@ def box(self, by=None, **kwargs):
         by : str or sequence
             Column in the DataFrame to group by.
 
-            .. versionadded:: 1.4.0
+            .. versionchanged:: 1.4.0
+
+               Previously, `by` is silently ignore and makes no groupings
+
         **kwargs
             Additional keywords are documented in
             :meth:`DataFrame.plot`.
@@ -1281,7 +1284,9 @@ def hist(self, by=None, bins=10, **kwargs):
         by : str or sequence, optional
             Column in the DataFrame to group by.
 
-            .. versionadded:: 1.4.0
+            .. versionchanged:: 1.4.0
+
+               Previously, `by` is silently ignore and makes no groupings
 
         bins : int, default 10
             Number of histogram bins to be used.
diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py
index 1c144d916ec9c..37cc3186fe097 100644
--- a/pandas/plotting/_matplotlib/groupby.py
+++ b/pandas/plotting/_matplotlib/groupby.py
@@ -59,13 +59,8 @@ def create_iter_data_given_by(
     # and are used for iteration and as subplots titles.
     if kind == "hist":
         level = 0
-    elif kind == "box":
-        level = 1
     else:
-        raise ValueError(
-            f"create_iter_data_given_by can only be used with "
-            f"kind 'hist' and 'box' plots, but used with '{kind}'"
-        )
+        level = 1
 
     # Select sub-columns based on the value of level of MI, and if `by` is
     # assigned, data must be a MI DataFrame

From f1aeee0d2725656a7f5a929f4dd8c63783b6ed8d Mon Sep 17 00:00:00 2001
From: kaiqi Dong <kaiqidong1991@gmail.com>
Date: Mon, 12 Jul 2021 08:22:08 +0200
Subject: [PATCH 142/142] fix flake8

---
 pandas/tests/plotting/frame/test_hist_box_by.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py
index 339120a503411..ba6d232733762 100644
--- a/pandas/tests/plotting/frame/test_hist_box_by.py
+++ b/pandas/tests/plotting/frame/test_hist_box_by.py
@@ -147,7 +147,7 @@ def test_hist_plot_empty_list_string_tuple_by(self, by, column):
         # GH 15079
         msg = "No group keys passed"
         with pytest.raises(ValueError, match=msg):
-            axes = _check_plot_works(self.hist_df.plot.hist, column=column, by=by)
+            _check_plot_works(self.hist_df.plot.hist, column=column, by=by)
 
     @pytest.mark.slow
     @pytest.mark.parametrize(
@@ -347,7 +347,7 @@ def test_box_plot_with_none_empty_list_by(self, by, column):
         # GH 15079
         msg = "No group keys passed"
         with pytest.raises(ValueError, match=msg):
-            axes = _check_plot_works(self.box_df.plot.box, column=column, by=by)
+            _check_plot_works(self.box_df.plot.box, column=column, by=by)
 
     @pytest.mark.slow
     @pytest.mark.parametrize(