diff --git a/doc/source/conf.py b/doc/source/conf.py index 117aa1724c4f2..4f01fe4f4b278 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -277,6 +277,7 @@ # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { 'statsmodels': ('http://statsmodels.sourceforge.net/devel/', None), + 'matplotlib': ('http://matplotlib.org/', None), 'python': ('http://docs.python.org/', None) } import glob diff --git a/doc/source/release.rst b/doc/source/release.rst index 9bfc3609f5b6d..58754e62610c6 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -58,6 +58,7 @@ New features ``DataFrame(dict)`` and ``Series(dict)`` create ``MultiIndex`` columns and index where applicable (:issue:`4187`) - Hexagonal bin plots from ``DataFrame.plot`` with ``kind='hexbin'`` (:issue:`5478`) +- Pie plots from ``Series.plot`` and ``DataFrame.plot`` with ``kind='pie'`` (:issue:`6976`) - Added the ``sym_diff`` method to ``Index`` (:issue:`5543`) - Added ``to_julian_date`` to ``TimeStamp`` and ``DatetimeIndex``. The Julian Date is used primarily in astronomy and represents the number of days from diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index c70e32fd18694..16b63ec374fa2 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -364,7 +364,8 @@ Plotting ~~~~~~~~ - Hexagonal bin plots from ``DataFrame.plot`` with ``kind='hexbin'`` (:issue:`5478`), See :ref:`the docs`. -- ``DataFrame.plot`` and ``Series.plot`` now supports area plot with specifying ``kind='area'`` (:issue:`6656`) +- ``DataFrame.plot`` and ``Series.plot`` now supports area plot with specifying ``kind='area'`` (:issue:`6656`), See :ref:`the docs` +- Pie plots from ``Series.plot`` and ``DataFrame.plot`` with ``kind='pie'`` (:issue:`6976`), See :ref:`the docs`. - Plotting with Error Bars is now supported in the ``.plot`` method of ``DataFrame`` and ``Series`` objects (:issue:`3796`, :issue:`6834`), See :ref:`the docs`. - ``DataFrame.plot`` and ``Series.plot`` now support a ``table`` keyword for plotting ``matplotlib.Table``, See :ref:`the docs`. - ``plot(legend='reverse')`` will now reverse the order of legend labels for diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index 8906e82eb937b..255acad7f927b 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -588,6 +588,80 @@ given by column ``z``. The bins are aggregated with numpy's ``max`` function. See the `matplotlib hexbin documenation `__ for more. +.. _visualization.pie: + +Pie plot +~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 0.14 + +You can create pie plot with ``DataFrame.plot`` or ``Series.plot`` with ``kind='pie'``. +If data includes ``NaN``, it will be automatically filled by 0. +If data contains negative value, ``ValueError`` will be raised. + +.. ipython:: python + :suppress: + + plt.figure() + +.. ipython:: python + + series = Series(3 * rand(4), index=['a', 'b', 'c', 'd'], name='series') + + @savefig series_pie_plot.png + series.plot(kind='pie') + +Note that pie plot with ``DataFrame`` requires either to specify target column by ``y`` +argument or ``subplots=True``. When ``y`` is specified, pie plot of selected column +will be drawn. If ``subplots=True`` is specified, pie plots for each columns are drawn as subplots. +Legend will be drawn in each pie plots by default, specify ``legend=False`` to hide it. + +.. ipython:: python + :suppress: + + plt.figure() + +.. ipython:: python + + df = DataFrame(3 * rand(4, 2), index=['a', 'b', 'c', 'd'], columns=['x', 'y']) + + @savefig df_pie_plot.png + df.plot(kind='pie', subplots=True) + +You can use ``labels`` and ``colors`` keywords to specify labels and colors of each wedges +(Cannot use ``label`` and ``color``, because of matplotlib's specification). +If you want to hide wedge labels, specify ``labels=None``. +If ``fontsize`` is specified, the value will be applied to wedge labels. +Also, other keywords supported by :func:`matplotlib.pyplot.pie` can be used. + + +.. ipython:: python + :suppress: + + plt.figure() + +.. ipython:: python + + @savefig series_pie_plot_options.png + series.plot(kind='pie', labels=['AA', 'BB', 'CC', 'DD'], colors=['r', 'g', 'b', 'c'], + autopct='%.2f', fontsize=20) + +If you pass values which sum total is less than 1.0, matplotlib draws semicircle. + +.. ipython:: python + :suppress: + + plt.figure() + +.. ipython:: python + + series = Series([0.1] * 4, index=['a', 'b', 'c', 'd'], name='series2') + + @savefig series_pie_plot_semi.png + series.plot(kind='pie') + +See the `matplotlib pie documenation `__ for more. + .. _visualization.andrews_curves: Andrews Curves diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 0186ac4c2b74b..829b2b296155f 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -2,6 +2,7 @@ # coding: utf-8 import nose +import itertools import os import string from distutils.version import LooseVersion @@ -138,6 +139,63 @@ def test_irregular_datetime(self): ax.set_xlim('1/1/1999', '1/1/2001') self.assertEqual(xp, ax.get_xlim()[0]) + @slow + def test_pie_series(self): + # if sum of values is less than 1.0, pie handle them as rate and draw semicircle. + series = Series(np.random.randint(1, 5), + index=['a', 'b', 'c', 'd', 'e'], name='YLABEL') + ax = _check_plot_works(series.plot, kind='pie') + for t, expected in zip(ax.texts, series.index): + self.assertEqual(t.get_text(), expected) + self.assertEqual(ax.get_ylabel(), 'YLABEL') + + # without wedge labels + ax = _check_plot_works(series.plot, kind='pie', labels=None) + for t, expected in zip(ax.texts, [''] * 5): + self.assertEqual(t.get_text(), expected) + + # with less colors than elements + color_args = ['r', 'g', 'b'] + ax = _check_plot_works(series.plot, kind='pie', colors=color_args) + + import matplotlib.colors as colors + conv = colors.colorConverter + color_expected = ['r', 'g', 'b', 'r', 'g'] + for p, expected in zip(ax.patches, color_expected): + self.assertEqual(p.get_facecolor(), conv.to_rgba(expected)) + + # with labels and colors + labels = ['A', 'B', 'C', 'D', 'E'] + color_args = ['r', 'g', 'b', 'c', 'm'] + ax = _check_plot_works(series.plot, kind='pie', labels=labels, colors=color_args) + + for t, expected in zip(ax.texts, labels): + self.assertEqual(t.get_text(), expected) + for p, expected in zip(ax.patches, color_args): + self.assertEqual(p.get_facecolor(), conv.to_rgba(expected)) + + # with autopct and fontsize + ax = _check_plot_works(series.plot, kind='pie', colors=color_args, + autopct='%.2f', fontsize=7) + pcts = ['{0:.2f}'.format(s * 100) for s in series.values / float(series.sum())] + iters = [iter(series.index), iter(pcts)] + expected_texts = list(it.next() for it in itertools.cycle(iters)) + for t, expected in zip(ax.texts, expected_texts): + self.assertEqual(t.get_text(), expected) + self.assertEqual(t.get_fontsize(), 7) + + # includes negative value + with tm.assertRaises(ValueError): + series = Series([1, 2, 0, 4, -1], index=['a', 'b', 'c', 'd', 'e']) + series.plot(kind='pie') + + # includes nan + series = Series([1, 2, np.nan, 4], + index=['a', 'b', 'c', 'd'], name='YLABEL') + ax = _check_plot_works(series.plot, kind='pie') + for t, expected in zip(ax.texts, series.index): + self.assertEqual(t.get_text(), expected) + @slow def test_hist(self): _check_plot_works(self.ts.hist) @@ -1511,6 +1569,39 @@ def test_allow_cmap(self): df.plot(kind='hexbin', x='A', y='B', cmap='YlGn', colormap='BuGn') + @slow + def test_pie_df(self): + df = DataFrame(np.random.rand(5, 3), columns=['X', 'Y', 'Z'], + index=['a', 'b', 'c', 'd', 'e']) + with tm.assertRaises(ValueError): + df.plot(kind='pie') + + ax = _check_plot_works(df.plot, kind='pie', y='Y') + for t, expected in zip(ax.texts, df.index): + self.assertEqual(t.get_text(), expected) + + axes = _check_plot_works(df.plot, kind='pie', subplots=True) + self.assertEqual(len(axes), len(df.columns)) + for ax in axes: + for t, expected in zip(ax.texts, df.index): + self.assertEqual(t.get_text(), expected) + for ax, ylabel in zip(axes, df.columns): + self.assertEqual(ax.get_ylabel(), ylabel) + + labels = ['A', 'B', 'C', 'D', 'E'] + color_args = ['r', 'g', 'b', 'c', 'm'] + axes = _check_plot_works(df.plot, kind='pie', subplots=True, + labels=labels, colors=color_args) + self.assertEqual(len(axes), len(df.columns)) + + import matplotlib.colors as colors + conv = colors.colorConverter + for ax in axes: + for t, expected in zip(ax.texts, labels): + self.assertEqual(t.get_text(), expected) + for p, expected in zip(ax.patches, color_args): + self.assertEqual(p.get_facecolor(), conv.to_rgba(expected)) + def test_errorbar_plot(self): d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)} @@ -1918,6 +2009,7 @@ def _check_plot_works(f, *args, **kwargs): plt.savefig(path) finally: tm.close(fig) + return ret diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 55aa01fd2e265..4453b1db359e9 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -1251,16 +1251,17 @@ def _get_style(self, i, col_name): return style or None - def _get_colors(self): + def _get_colors(self, num_colors=None, color_kwds='color'): from pandas.core.frame import DataFrame - if isinstance(self.data, DataFrame): - num_colors = len(self.data.columns) - else: - num_colors = 1 + if num_colors is None: + if isinstance(self.data, DataFrame): + num_colors = len(self.data.columns) + else: + num_colors = 1 return _get_standard_colors(num_colors=num_colors, colormap=self.colormap, - color=self.kwds.get('color')) + color=self.kwds.get(color_kwds)) def _maybe_add_color(self, colors, kwds, style, i): has_color = 'color' in kwds or self.colormap is not None @@ -1939,6 +1940,63 @@ def _post_plot_logic(self): # self.axes[0].legend(loc='best') +class PiePlot(MPLPlot): + + def __init__(self, data, kind=None, **kwargs): + data = data.fillna(value=0) + if (data < 0).any().any(): + raise ValueError("{0} doesn't allow negative values".format(kind)) + MPLPlot.__init__(self, data, kind=kind, **kwargs) + + def _args_adjust(self): + self.grid = False + self.logy = False + self.logx = False + self.loglog = False + + def _get_layout(self): + from pandas import DataFrame + if isinstance(self.data, DataFrame): + return (1, len(self.data.columns)) + else: + return (1, 1) + + def _validate_color_args(self): + pass + + def _make_plot(self): + self.kwds.setdefault('colors', self._get_colors(num_colors=len(self.data), + color_kwds='colors')) + + for i, (label, y) in enumerate(self._iter_data()): + ax = self._get_ax(i) + if label is not None: + label = com.pprint_thing(label) + ax.set_ylabel(label) + + kwds = self.kwds.copy() + + idx = [com.pprint_thing(v) for v in self.data.index] + labels = kwds.pop('labels', idx) + # labels is used for each wedge's labels + results = ax.pie(y, labels=labels, **kwds) + + if kwds.get('autopct', None) is not None: + patches, texts, autotexts = results + else: + patches, texts = results + autotexts = [] + + if self.fontsize is not None: + for t in texts + autotexts: + t.set_fontsize(self.fontsize) + + # leglabels is used for legend labels + leglabels = labels if labels is not None else idx + for p, l in zip(patches, leglabels): + self._add_legend_handle(p, l) + + class BoxPlot(MPLPlot): pass @@ -1950,12 +2008,14 @@ class HistPlot(MPLPlot): _common_kinds = ['line', 'bar', 'barh', 'kde', 'density', 'area'] # kinds supported by dataframe _dataframe_kinds = ['scatter', 'hexbin'] -_all_kinds = _common_kinds + _dataframe_kinds +# kinds supported only by series or dataframe single column +_series_kinds = ['pie'] +_all_kinds = _common_kinds + _dataframe_kinds + _series_kinds _plot_klass = {'line': LinePlot, 'bar': BarPlot, 'barh': BarPlot, 'kde': KdePlot, 'scatter': ScatterPlot, 'hexbin': HexBinPlot, - 'area': AreaPlot} + 'area': AreaPlot, 'pie': PiePlot} def plot_frame(frame=None, x=None, y=None, subplots=False, sharex=True, @@ -2054,7 +2114,7 @@ def plot_frame(frame=None, x=None, y=None, subplots=False, sharex=True, """ kind = _get_standard_kind(kind.lower().strip()) - if kind in _dataframe_kinds or kind in _common_kinds: + if kind in _all_kinds: klass = _plot_klass[kind] else: raise ValueError('Invalid chart type given %s' % kind) @@ -2068,6 +2128,24 @@ def plot_frame(frame=None, x=None, y=None, subplots=False, sharex=True, figsize=figsize, logx=logx, logy=logy, sort_columns=sort_columns, secondary_y=secondary_y, **kwds) + elif kind in _series_kinds: + if y is None and subplots is False: + msg = "{0} requires either y column or 'subplots=True'" + raise ValueError(msg.format(kind)) + elif y is not None: + if com.is_integer(y) and not frame.columns.holds_integer(): + y = frame.columns[y] + frame = frame[y] # converted to series actually + frame.index.name = y + + plot_obj = klass(frame, kind=kind, subplots=subplots, + rot=rot,legend=legend, ax=ax, style=style, + fontsize=fontsize, use_index=use_index, sharex=sharex, + sharey=sharey, xticks=xticks, yticks=yticks, + xlim=xlim, ylim=ylim, title=title, grid=grid, + figsize=figsize, + sort_columns=sort_columns, + **kwds) else: if x is not None: if com.is_integer(x) and not frame.columns.holds_integer(): @@ -2168,7 +2246,7 @@ def plot_series(series, label=None, kind='line', use_index=True, rot=None, """ kind = _get_standard_kind(kind.lower().strip()) - if kind in _common_kinds: + if kind in _common_kinds or kind in _series_kinds: klass = _plot_klass[kind] else: raise ValueError('Invalid chart type given %s' % kind)