From 3f2fc320fb7fb03eb7bf42e9ce3c1cb3e44a1028 Mon Sep 17 00:00:00 2001 From: jreback Date: Tue, 4 Mar 2014 13:32:23 -0500 Subject: [PATCH] BUG: Bug in setitem with a duplicate index and an alignable rhs (GH6541) --- doc/source/release.rst | 1 + pandas/core/index.py | 9 +++++++-- pandas/core/indexing.py | 4 +++- pandas/tests/test_indexing.py | 23 +++++++++++++++++++++++ 4 files changed, 34 insertions(+), 3 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 29fd5bad16986..f5e2c80289c5c 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -199,6 +199,7 @@ Bug Fixes - Bug that caused _ref_locs corruption when slice indexing across columns axis of a DataFrame (:issue:`6525`) - Regression from 0.13 in the treatmenet of numpy ``datetime64`` non-ns dtypes in Series creation (:issue:`6529`) - ``.names`` attribute of MultiIndexes passed to ``set_index`` are now preserved (:issue:`6459`). +- Bug in setitem with a duplicate index and an alignable rhs (:issue:`6541`) pandas 0.13.1 ------------- diff --git a/pandas/core/index.py b/pandas/core/index.py index c16e2eff06904..f67270530c3f8 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -987,8 +987,13 @@ def intersection(self, other): except TypeError: pass - indexer = self.get_indexer(other.values) - indexer = indexer.take((indexer != -1).nonzero()[0]) + try: + indexer = self.get_indexer(other.values) + indexer = indexer.take((indexer != -1).nonzero()[0]) + except: + # duplicates + indexer = self.get_indexer_non_unique(other.values)[0].unique() + return self.take(indexer) def diff(self, other): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 6691db5f35bb4..288934dbd27f4 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -441,7 +441,9 @@ def can_do_equal_len(): # align to if item in value: v = value[item] - v = v.reindex(self.obj[item].index & v.index) + i = self.obj[item].index + v = v.reindex(i & v.index) + setter(item, v.values) else: setter(item, np.nan) diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index d373f2f43ad3e..f466ea302ee1c 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -564,6 +564,29 @@ def test_loc_setitem(self): expected = DataFrame({'a' : [0.5,-0.5,-1.5], 'b' : [0,1,2] }) assert_frame_equal(df,expected) + def test_loc_setitem_dups(self): + + # GH 6541 + df_orig = DataFrame({'me' : list('rttti'), + 'foo': list('aaade'), + 'bar': np.arange(5,dtype='float64')*1.34+2, + 'bar2': np.arange(5,dtype='float64')*-.34+2}).set_index('me') + + indexer = tuple(['r',['bar','bar2']]) + df = df_orig.copy() + df.loc[indexer]*=2.0 + assert_series_equal(df.loc[indexer],2.0*df_orig.loc[indexer]) + + indexer = tuple(['r','bar']) + df = df_orig.copy() + df.loc[indexer]*=2.0 + self.assertEqual(df.loc[indexer],2.0*df_orig.loc[indexer]) + + indexer = tuple(['t',['bar','bar2']]) + df = df_orig.copy() + df.loc[indexer]*=2.0 + assert_frame_equal(df.loc[indexer],2.0*df_orig.loc[indexer]) + def test_chained_getitem_with_lists(self): # GH6394