From 4089cacbbd5f7aa9b41c9ade0298d7249d3f3f89 Mon Sep 17 00:00:00 2001 From: TomAugspurger Date: Thu, 4 Sep 2014 13:30:11 -0500 Subject: [PATCH 1/2] BUG: barplot with NaNs --- doc/source/v0.15.0.txt | 1 + doc/source/visualization.rst | 7 +++++++ pandas/tests/test_graphics.py | 17 +++++++++++++++++ pandas/tools/plotting.py | 6 ++++-- 4 files changed, 29 insertions(+), 2 deletions(-) diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index bfd484b363dd2..aa6d1dff2c547 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -670,3 +670,4 @@ Bug Fixes - Bug with kde plot and NaNs (:issue:`8182`) - Bug in ``GroupBy.count`` with float32 data type were nan values were not excluded (:issue:`8169`). +- Bug with stacked barplots and NaNs (:issue:`8175`). diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index 1cce55cd53e11..26d5a8c9ff154 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -203,6 +203,13 @@ To get horizontal bar plots, pass ``kind='barh'``: @savefig barh_plot_stacked_ex.png df2.plot(kind='barh', stacked=True); +Pandas tries to be pragmatic about plotting DataFrames or Series +that contain missing data. When it makes sense missing values will +be filled with 0 (as is the case with bar plots). For other plots, +like line plots, filling in 0 usually doesn't make sense, and so pandas doesn't +try to guess. If you want to be explicit you can always call +:meth:`~pandas.DataFrame.fillna` before plotting. + .. _visualization.hist: Histograms diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 131edf499ff18..8f33da4521c8e 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -1479,6 +1479,23 @@ def test_bar_bottom_left(self): result = [p.get_x() for p in ax.patches] self.assertEqual(result, [1] * 5) + @slow + def test_bar_nan(self): + df = DataFrame({'A': [10, np.nan, 20], 'B': [5, 10, 20], + 'C': [1, 2, 3]}) + ax = df.plot(kind='bar') + expected = [10, 0, 20, 5, 10, 20, 1, 2, 3] + result = [p.get_height() for p in ax.patches] + self.assertEqual(result, expected) + + ax = df.plot(kind='bar', stacked=True) + result = [p.get_height() for p in ax.patches] + self.assertEqual(result, expected) + + result = [p.get_y() for p in ax.patches] + expected = [0.0, 0.0, 0.0, 10.0, 0.0, 20.0, 15.0, 10.0, 40.0] + self.assertEqual(result, expected) + @slow def test_plot_scatter(self): df = DataFrame(randn(6, 4), diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 56316ac726c8a..089caf382120f 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -870,9 +870,11 @@ def _validate_color_args(self): " use one or the other or pass 'style' " "without a color symbol") - def _iter_data(self, data=None, keep_index=False): + def _iter_data(self, data=None, keep_index=False, fillna=None): if data is None: data = self.data + if fillna is not None: + data = data.fillna(fillna) from pandas.core.frame import DataFrame if isinstance(data, (Series, np.ndarray, Index)): @@ -1780,7 +1782,7 @@ def _make_plot(self): pos_prior = neg_prior = np.zeros(len(self.data)) K = self.nseries - for i, (label, y) in enumerate(self._iter_data()): + for i, (label, y) in enumerate(self._iter_data(fillna=0)): ax = self._get_ax(i) kwds = self.kwds.copy() kwds['color'] = colors[i % ncolors] From 9bf6b59f73e7a915636fb6251caf71a6bf5db7a5 Mon Sep 17 00:00:00 2001 From: TomAugspurger Date: Fri, 5 Sep 2014 09:36:24 -0500 Subject: [PATCH 2/2] DOC: Table about how plots handle missing data. --- doc/source/visualization.rst | 45 ++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index 26d5a8c9ff154..8653581f5958d 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -203,13 +203,6 @@ To get horizontal bar plots, pass ``kind='barh'``: @savefig barh_plot_stacked_ex.png df2.plot(kind='barh', stacked=True); -Pandas tries to be pragmatic about plotting DataFrames or Series -that contain missing data. When it makes sense missing values will -be filled with 0 (as is the case with bar plots). For other plots, -like line plots, filling in 0 usually doesn't make sense, and so pandas doesn't -try to guess. If you want to be explicit you can always call -:meth:`~pandas.DataFrame.fillna` before plotting. - .. _visualization.hist: Histograms @@ -684,6 +677,44 @@ See the `matplotlib pie documenation