pandas-dev · jreback · May 1, 2014 · Mar 15, 2014 · TomAugspurger · Apr 28, 2014
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -190,6 +190,8 @@ API Changes
   ``data`` argument (:issue:`5357`)
 - groupby will now not return the grouped column for non-cython functions (:issue:`5610`),
   as its already the index
+- ``DataFrame.plot`` and ``Series.plot`` now supports area plot with specifying ``kind='area'`` (:issue:`6656`)
+- Line plot can be stacked by ``stacked=True``. (:issue:`6656`)
 
 Deprecations
 ~~~~~~~~~~~~

diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt
@@ -364,10 +364,12 @@ Plotting
 ~~~~~~~~
 
 - Hexagonal bin plots from ``DataFrame.plot`` with ``kind='hexbin'`` (:issue:`5478`), See :ref:`the docs<visualization.hexbin>`.
+- ``DataFrame.plot`` and ``Series.plot`` now supports area plot with specifying ``kind='area'`` (:issue:`6656`)
 - Plotting with Error Bars is now supported in the ``.plot`` method of ``DataFrame`` and ``Series`` objects (:issue:`3796`), See :ref:`the docs<visualization.errorbars>`.
 - ``DataFrame.plot`` and ``Series.plot`` now support a ``table`` keyword for plotting ``matplotlib.Table``, See :ref:`the docs<visualization.table>`.
 - ``plot(legend='reverse')`` will now reverse the order of legend labels for
   most plot kinds. (:issue:`6014`)
+- Line plot and area plot can be stacked by ``stacked=True`` (:issue:`6656`) 
 
 - Following keywords are now acceptable for :meth:`DataFrame.plot(kind='bar')` and :meth:`DataFrame.plot(kind='barh')`.
 

diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst
@@ -461,6 +461,40 @@ Finally, there is a helper function ``pandas.tools.plotting.table`` to create a
 
 **Note**: You can get table instances on the axes using ``axes.tables`` property for further decorations. See the `matplotlib table documenation <http://matplotlib.org/api/axes_api.html#matplotlib.axes.Axes.table>`__ for more.
 
+.. _visualization.area_plot:
+
+Area plot
+~~~~~~~~~~~~~~~~~~~
+
+.. versionadded:: 0.14
+
+You can create area plots with ``Series.plot`` and ``DataFrame.plot`` by passing ``kind='area'``. Area plots are stacked by default. To produce stacked area plot, each column must be either all positive or all negative values.
+
+When input data contains `NaN`, it will be automatically filled by 0. If you want to drop or fill by different values, use :func:`dataframe.dropna` or :func:`dataframe.fillna` before calling `plot`.
+
+.. ipython:: python
+   :suppress:
+
+   plt.figure();
+
+.. ipython:: python
+
+   df = DataFrame(rand(10, 4), columns=['a', 'b', 'c', 'd'])
+
+   @savefig area_plot_stacked.png
+   df.plot(kind='area');
+
+To produce an unstacked plot, pass ``stacked=False``. Alpha value is set to 0.5 unless otherwise specified:
+
+.. ipython:: python
+   :suppress:
+
+   plt.figure();
+
+.. ipython:: python
+
+   @savefig area_plot_unstacked.png
+   df.plot(kind='area', stacked=False);
 
 .. _visualization.scatter_matrix:
 

diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py
@@ -14,7 +14,7 @@
 
 import numpy as np
 from numpy import random
-from numpy.random import randn
+from numpy.random import rand, randn
 
 from numpy.testing import assert_array_equal
 from numpy.testing.decorators import slow
@@ -54,9 +54,10 @@ def test_plot(self):
         _check_plot_works(self.ts.plot, style='.', logx=True)
         _check_plot_works(self.ts.plot, style='.', loglog=True)
         _check_plot_works(self.ts[:10].plot, kind='bar')
+        _check_plot_works(self.ts.plot, kind='area', stacked=False)
         _check_plot_works(self.iseries.plot)
 
-        for kind in plotting._common_kinds:
+        for kind in ['line', 'bar', 'barh', 'kde']:
             _check_plot_works(self.series[:5].plot, kind=kind)
 
         _check_plot_works(self.series[:10].plot, kind='barh')
@@ -75,6 +76,33 @@ def test_plot_figsize_and_title(self):
         assert_array_equal(np.round(ax.figure.get_size_inches()),
                            np.array((16., 8.)))
 
+    def test_ts_area_lim(self):
+        ax = self.ts.plot(kind='area', stacked=False)
+        xmin, xmax = ax.get_xlim()
+        lines = ax.get_lines()
+        self.assertEqual(xmin, lines[0].get_data(orig=False)[0][0])       
+        self.assertEqual(xmax, lines[0].get_data(orig=False)[0][-1])
+
+    def test_line_area_nan_series(self):
+        values = [1, 2, np.nan, 3]
+        s = Series(values)
+        ts = Series(values, index=tm.makeDateIndex(k=4))
+
+        for d in [s, ts]:
+            ax = _check_plot_works(d.plot)
+            masked = ax.lines[0].get_ydata()
+            # remove nan for comparison purpose
+            self.assert_numpy_array_equal(np.delete(masked.data, 2), np.array([1, 2, 3]))
+            self.assert_numpy_array_equal(masked.mask, np.array([False, False, True, False]))
+
+            expected = np.array([1, 2, 0, 3])
+            ax = _check_plot_works(d.plot, stacked=True)
+            self.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected)
+            ax = _check_plot_works(d.plot, kind='area')
+            self.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected)
+            ax = _check_plot_works(d.plot, kind='area', stacked=False)
+            self.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected)
+
     @slow
     def test_bar_log(self):
         expected = np.array([1., 10., 100., 1000.])
@@ -500,7 +528,7 @@ def test_subplots(self):
         df = DataFrame(np.random.rand(10, 3),
                        index=list(string.ascii_letters[:10]))
 
-        for kind in ['bar', 'barh', 'line']:
+        for kind in ['bar', 'barh', 'line', 'area']:
             axes = df.plot(kind=kind, subplots=True, sharex=True, legend=True)
 
             for ax, column in zip(axes, df.columns):
@@ -529,6 +557,104 @@ def test_subplots(self):
             for ax in axes:
                 self.assertTrue(ax.get_legend() is None)
 
+    def test_negative_log(self):
+        df = - DataFrame(rand(6, 4),
+                       index=list(string.ascii_letters[:6]),
+                       columns=['x', 'y', 'z', 'four'])
+
+        with tm.assertRaises(ValueError):
+            df.plot(kind='area', logy=True)
+        with tm.assertRaises(ValueError):
+            df.plot(kind='area', loglog=True)
+
+    def _compare_stacked_y_cood(self, normal_lines, stacked_lines):
+        base = np.zeros(len(normal_lines[0].get_data()[1]))
+        for nl, sl in zip(normal_lines, stacked_lines):
+            base += nl.get_data()[1] # get y coodinates
+            sy = sl.get_data()[1]
+            self.assert_numpy_array_equal(base, sy)
+
+    def test_line_area_stacked(self):
+        with tm.RNGContext(42):
+            df = DataFrame(rand(6, 4),
+                           columns=['w', 'x', 'y', 'z'])
+            neg_df = - df
+            # each column has either positive or negative value
+            sep_df = DataFrame({'w': rand(6), 'x': rand(6),
+                                'y': - rand(6), 'z': - rand(6)})
+            # each column has positive-negative mixed value
+            mixed_df = DataFrame(randn(6, 4), index=list(string.ascii_letters[:6]),
+                                 columns=['w', 'x', 'y', 'z'])
+
+            for kind in ['line', 'area']:
+                ax1 = _check_plot_works(df.plot, kind=kind, stacked=False)
+                ax2 = _check_plot_works(df.plot, kind=kind, stacked=True)
+                self._compare_stacked_y_cood(ax1.lines, ax2.lines)
+
+                ax1 = _check_plot_works(neg_df.plot, kind=kind, stacked=False)
+                ax2 = _check_plot_works(neg_df.plot, kind=kind, stacked=True)
+                self._compare_stacked_y_cood(ax1.lines, ax2.lines)
+
+                ax1 = _check_plot_works(sep_df.plot, kind=kind, stacked=False)
+                ax2 = _check_plot_works(sep_df.plot, kind=kind, stacked=True)
+                self._compare_stacked_y_cood(ax1.lines[:2], ax2.lines[:2])
+                self._compare_stacked_y_cood(ax1.lines[2:], ax2.lines[2:])
+
+                _check_plot_works(mixed_df.plot, stacked=False)
+                with tm.assertRaises(ValueError):
+                    mixed_df.plot(stacked=True)
+
+                _check_plot_works(df.plot, kind=kind, logx=True, stacked=True)
+
+    def test_line_area_nan_df(self):
+        values1 = [1, 2, np.nan, 3]
+        values2 = [3, np.nan, 2, 1]
+        df = DataFrame({'a': values1, 'b': values2})
+        tdf = DataFrame({'a': values1, 'b': values2}, index=tm.makeDateIndex(k=4))
+
+        for d in [df, tdf]:
+            ax = _check_plot_works(d.plot)
+            masked1 = ax.lines[0].get_ydata()
+            masked2 = ax.lines[1].get_ydata()
+            # remove nan for comparison purpose
+            self.assert_numpy_array_equal(np.delete(masked1.data, 2), np.array([1, 2, 3]))
+            self.assert_numpy_array_equal(np.delete(masked2.data, 1), np.array([3, 2, 1]))
+            self.assert_numpy_array_equal(masked1.mask, np.array([False, False, True, False]))
+            self.assert_numpy_array_equal(masked2.mask, np.array([False, True, False, False]))
+
+            expected1 = np.array([1, 2, 0, 3])
+            expected2 = np.array([3, 0, 2, 1])
+
+            ax = _check_plot_works(d.plot, stacked=True)
+            self.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1)
+            self.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected1 + expected2)
+
+            ax = _check_plot_works(d.plot, kind='area')
+            self.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1)
+            self.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected1 + expected2)
+
+            ax = _check_plot_works(d.plot, kind='area', stacked=False)
+            self.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1)
+            self.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected2)
+
+    def test_area_lim(self):
+        df = DataFrame(rand(6, 4),
+                       columns=['x', 'y', 'z', 'four'])
+
+        neg_df = - df
+        for stacked in [True, False]:
+            ax = _check_plot_works(df.plot, kind='area', stacked=stacked)
+            xmin, xmax = ax.get_xlim()
+            ymin, ymax = ax.get_ylim()
+            lines = ax.get_lines()
+            self.assertEqual(xmin, lines[0].get_data()[0][0])        
+            self.assertEqual(xmax, lines[0].get_data()[0][-1]) 
+            self.assertEqual(ymin, 0) 
+
+            ax = _check_plot_works(neg_df.plot, kind='area', stacked=stacked)
+            ymin, ymax = ax.get_ylim()
+            self.assertEqual(ymax, 0) 
+
     @slow
     def test_bar_colors(self):
         import matplotlib.pyplot as plt
@@ -1077,11 +1203,11 @@ def _check_legend_labels(self, ax, labels):
 
     @slow
     def test_df_legend_labels(self):
-        kinds = 'line', 'bar', 'barh', 'kde', 'density'
-        df = DataFrame(randn(3, 3), columns=['a', 'b', 'c'])
-        df2 = DataFrame(randn(3, 3), columns=['d', 'e', 'f'])
-        df3 = DataFrame(randn(3, 3), columns=['g', 'h', 'i'])
-        df4 = DataFrame(randn(3, 3), columns=['j', 'k', 'l'])
+        kinds = 'line', 'bar', 'barh', 'kde', 'density', 'area'
+        df = DataFrame(rand(3, 3), columns=['a', 'b', 'c'])
+        df2 = DataFrame(rand(3, 3), columns=['d', 'e', 'f'])
+        df3 = DataFrame(rand(3, 3), columns=['g', 'h', 'i'])
+        df4 = DataFrame(rand(3, 3), columns=['j', 'k', 'l'])
 
         for kind in kinds:
             ax = df.plot(kind=kind, legend=True)
@@ -1170,31 +1296,41 @@ def test_style_by_column(self):
             for i, l in enumerate(ax.get_lines()[:len(markers)]):
                 self.assertEqual(l.get_marker(), markers[i])
 
+    def check_line_colors(self, colors, lines):
+        for i, l in enumerate(lines):
+            xp = colors[i]
+            rs = l.get_color()
+            self.assertEqual(xp, rs)
+
+    def check_collection_colors(self, colors, cols):
+        from matplotlib.colors import ColorConverter
+        conv = ColorConverter()
+        for i, c in enumerate(cols):
+            xp = colors[i]
+            xp = conv.to_rgba(xp)
+            rs = c.get_facecolor()[0]
+            for x, y in zip(xp, rs):
+                self.assertEqual(x, y)
+
     @slow
     def test_line_colors(self):
         import matplotlib.pyplot as plt
         import sys
         from matplotlib import cm
 
         custom_colors = 'rgcby'
-
         df = DataFrame(randn(5, 5))
 
         ax = df.plot(color=custom_colors)
-
-        lines = ax.get_lines()
-        for i, l in enumerate(lines):
-            xp = custom_colors[i]
-            rs = l.get_color()
-            self.assertEqual(xp, rs)
+        self.check_line_colors(custom_colors, ax.get_lines())
 
         tmp = sys.stderr
         sys.stderr = StringIO()
         try:
             tm.close()
             ax2 = df.plot(colors=custom_colors)
             lines2 = ax2.get_lines()
-            for l1, l2 in zip(lines, lines2):
+            for l1, l2 in zip(ax.get_lines(), lines2):
                 self.assertEqual(l1.get_color(), l2.get_color())
         finally:
             sys.stderr = tmp
@@ -1204,30 +1340,45 @@ def test_line_colors(self):
         ax = df.plot(colormap='jet')
 
         rgba_colors = lmap(cm.jet, np.linspace(0, 1, len(df)))
-
-        lines = ax.get_lines()
-        for i, l in enumerate(lines):
-            xp = rgba_colors[i]
-            rs = l.get_color()
-            self.assertEqual(xp, rs)
+        self.check_line_colors(rgba_colors, ax.get_lines())
 
         tm.close()
 
         ax = df.plot(colormap=cm.jet)
 
         rgba_colors = lmap(cm.jet, np.linspace(0, 1, len(df)))
-
-        lines = ax.get_lines()
-        for i, l in enumerate(lines):
-            xp = rgba_colors[i]
-            rs = l.get_color()
-            self.assertEqual(xp, rs)
+        self.check_line_colors(rgba_colors, ax.get_lines())
 
         # make color a list if plotting one column frame
         # handles cases like df.plot(color='DodgerBlue')
         tm.close()
         df.ix[:, [0]].plot(color='DodgerBlue')
 
+    @slow
+    def test_area_colors(self):
+        from matplotlib import cm
+        from matplotlib.collections import PolyCollection
+
+        custom_colors = 'rgcby'
+        df = DataFrame(rand(5, 5))
+
+        ax = df.plot(kind='area', color=custom_colors)
+        self.check_line_colors(custom_colors, ax.get_lines())
+        poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)]
+        self.check_collection_colors(custom_colors, poly)
+
+        ax = df.plot(kind='area', colormap='jet')
+        rgba_colors = lmap(cm.jet, np.linspace(0, 1, len(df)))
+        self.check_line_colors(rgba_colors, ax.get_lines())
+        poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)]
+        self.check_collection_colors(rgba_colors, poly)
+
+        ax = df.plot(kind='area', colormap=cm.jet)
+        rgba_colors = lmap(cm.jet, np.linspace(0, 1, len(df)))
+        self.check_line_colors(rgba_colors, ax.get_lines())
+        poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)]
+        self.check_collection_colors(rgba_colors, poly)
+
     def test_default_color_cycle(self):
         import matplotlib.pyplot as plt
         plt.rcParams['axes.color_cycle'] = list('rgbk')
@@ -1268,6 +1419,15 @@ def test_partially_invalid_plot_data(self):
                 with tm.assertRaises(TypeError):
                     df.plot(kind=kind)
 
+        with tm.RNGContext(42):
+            # area plot doesn't support positive/negative mixed data
+            kinds = ['area']
+            df = DataFrame(rand(10, 2), dtype=object)
+            df[np.random.rand(df.shape[0]) > 0.5] = 'a'
+            for kind in kinds:
+                with tm.assertRaises(TypeError):
+                    df.plot(kind=kind)
+
     def test_invalid_kind(self):
         df = DataFrame(randn(10, 2))
         with tm.assertRaises(ValueError):
@@ -1671,6 +1831,7 @@ def _check_plot_works(f, *args, **kwargs):
             plt.savefig(path)
     finally:
         tm.close(fig)
+    return ret
 
 
 def curpath():