diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt index 459bdbf10a4f1..8e336a78cc801 100644 --- a/doc/source/whatsnew/v0.18.2.txt +++ b/doc/source/whatsnew/v0.18.2.txt @@ -149,3 +149,4 @@ Bug Fixes - Bug in ``NaT`` - ``Period`` raises ``AttributeError`` (:issue:`13071`) - Bug in ``Period`` addition raises ``TypeError`` if ``Period`` is on right hand side (:issue:`13069`) - Bug in ``pd.set_eng_float_format()`` that would prevent NaN's from formatting (:issue:`11981`) +- Fixed the bug in ``DataFrame.align()`` which was giving wrong output when supplied with the ``join`` argument. Earlier, upon supplying value of join argument as any of the four('outer', 'inner', 'left', 'right' ), align() was giving erraneous output. Align with broadcast_axis specified was using 'inner' join consistently irrespective of the value of 'join' provided when aligning dataframe and series on other axis. The problem was identified to be in pandas/core/generic.py and has been subsequently fixed. (:issue:`13194`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6c80ab9d87e33..ae3d71f148a82 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4147,13 +4147,17 @@ def align(self, other, join='outer', axis=None, level=None, copy=True, from pandas import DataFrame, Series method = missing.clean_fill_method(method) + if axis is not None: + axis = self._get_axis_number(axis) + if broadcast_axis == 1 and self.ndim != other.ndim: if isinstance(self, Series): # this means other is a DataFrame, and we need to broadcast # self cons = self._constructor_expanddim df = cons(dict((c, self) for c in other.columns), - **other._construct_axes_dict()) + **self._construct_axes_dict( + **other._construct_axes_dict(axes=['columns']))) return df._align_frame(other, join=join, axis=axis, level=level, copy=copy, fill_value=fill_value, method=method, @@ -4163,14 +4167,13 @@ def align(self, other, join='outer', axis=None, level=None, copy=True, # other cons = other._constructor_expanddim df = cons(dict((c, other) for c in self.columns), - **self._construct_axes_dict()) + **other._construct_axes_dict( + **self._construct_axes_dict(axes=['columns']))) return self._align_frame(df, join=join, axis=axis, level=level, copy=copy, fill_value=fill_value, method=method, limit=limit, fill_axis=fill_axis) - if axis is not None: - axis = self._get_axis_number(axis) if isinstance(other, DataFrame): return self._align_frame(other, join=join, axis=axis, level=level, copy=copy, fill_value=fill_value, diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 09dd0f3b14812..959eda4e1a591 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -880,3 +880,66 @@ def test_reindex_multi(self): expected = df.reindex([0, 1]).reindex(columns=['a', 'b']) assert_frame_equal(result, expected) + + def test_align_broadcast_axis(self): + # GH 13194 + # First four tests for DataFrame.align(Index) + # For 'right' join + df = DataFrame(np.array([[1., 2.], [3., 4.]]), columns=list('AB')) + ts = Series([5., 6., 7.]) + + result = df.align(ts, join='right', axis=0, broadcast_axis=1) + expected1 = DataFrame(np.array([[1., 2.], [3., 4.], + [pd.np.nan, pd.np.nan]]), + columns=list('AB')) + expected2 = DataFrame(np.array([[5., 5.], [6., 6.], [7., 7.]]), + columns=list('AB')) + assert_frame_equal(result[0], expected1) + assert_frame_equal(result[1], expected2) + + # For 'right' join on different index + result = df.align(ts, join='right', axis=1, broadcast_axis=1) + expected1 = DataFrame(np.array([[1., 2.], [3., 4.]]), + columns=list('AB')) + expected2 = DataFrame(np.array([[5., 5.], [6., 6.], + [7., 7.]]), + columns=list('AB')) + assert_frame_equal(result[0], expected1) + assert_frame_equal(result[1], expected2) + + # For 'left' join + result = df.align(ts, join='left', axis=0, broadcast_axis=1) + expected1 = DataFrame(np.array([[1., 2.], [3., 4.]]), + columns=list('AB')) + expected2 = DataFrame(np.array([[5., 5.], [6., 6.]]), + columns=list('AB')) + assert_frame_equal(result[0], expected1) + assert_frame_equal(result[1], expected2) + + # For 'left' join on different axis + result = df.align(ts, join='left', axis=1, broadcast_axis=1) + expected1 = DataFrame(np.array([[1., 2.], [3., 4.]]), + columns=list('AB')) + expected2 = DataFrame(np.array([[5., 5.], [6., 6.], [7., 7.]]), + columns=list('AB')) + assert_frame_equal(result[0], expected1) + assert_frame_equal(result[1], expected2) + + # Series.align(DataFrame) tests, 'outer' join + result = ts.align(df, join='outer', axis=0, broadcast_axis=1) + expected1 = DataFrame(np.array([[5., 5.], [6., 6.], [7., 7.]]), + columns=list('AB')) + expected2 = DataFrame(np.array([[1., 2.], [3., 4.], + [pd.np.nan, pd.np.nan]]), + columns=list('AB')) + assert_frame_equal(result[0], expected1) + assert_frame_equal(result[1], expected2) + + # Series.align(DataFrame) tests, 'inner' join + result = ts.align(df, join='inner', axis=0, broadcast_axis=1) + expected1 = DataFrame(np.array([[5., 5.], [6., 6.]]), + columns=list('AB')) + expected2 = DataFrame(np.array([[1., 2.], [3., 4.]]), + columns=list('AB')) + assert_frame_equal(result[0], expected1) + assert_frame_equal(result[1], expected2) diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index ba282f0107d71..004107fe4f783 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -1874,6 +1874,7 @@ def test_pipe_panel(self): with tm.assertRaises(ValueError): result = wp.pipe((f, 'y'), x=1, y=1) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False)