3030@Appender (_merge_doc , indents = 0 )
3131def merge (left , right , how = 'inner' , on = None , left_on = None , right_on = None ,
3232 left_index = False , right_index = False , sort = False ,
33- suffixes = ('_x' , '_y' ), copy = True ):
33+ suffixes = ('_x' , '_y' ), copy = True , indicator = False ):
3434 op = _MergeOperation (left , right , how = how , on = on , left_on = left_on ,
3535 right_on = right_on , left_index = left_index ,
3636 right_index = right_index , sort = sort , suffixes = suffixes ,
37- copy = copy )
37+ copy = copy , indicator = indicator )
3838 return op .get_result ()
3939if __debug__ :
4040 merge .__doc__ = _merge_doc % '\n left : DataFrame'
@@ -160,7 +160,7 @@ class _MergeOperation(object):
160160 def __init__ (self , left , right , how = 'inner' , on = None ,
161161 left_on = None , right_on = None , axis = 1 ,
162162 left_index = False , right_index = False , sort = True ,
163- suffixes = ('_x' , '_y' ), copy = True ):
163+ suffixes = ('_x' , '_y' ), copy = True , indicator = False ):
164164 self .left = self .orig_left = left
165165 self .right = self .orig_right = right
166166 self .how = how
@@ -177,12 +177,18 @@ def __init__(self, left, right, how='inner', on=None,
177177 self .left_index = left_index
178178 self .right_index = right_index
179179
180+ self .indicator = indicator
181+
180182 # note this function has side effects
181183 (self .left_join_keys ,
182184 self .right_join_keys ,
183185 self .join_names ) = self ._get_merge_keys ()
184186
185187 def get_result (self ):
188+ if self .indicator :
189+ # Has side-effects. Most cleaned up in `self._indicator_post_merge`
190+ self ._indicator_pre_merge (self .left , self .right )
191+
186192 join_index , left_indexer , right_indexer = self ._get_join_info ()
187193
188194 ldata , rdata = self .left ._data , self .right ._data
@@ -202,10 +208,41 @@ def get_result(self):
202208 typ = self .left ._constructor
203209 result = typ (result_data ).__finalize__ (self , method = 'merge' )
204210
211+ if self .indicator :
212+ # Has side-effects
213+ self ._indicator_post_merge (result , self .left , self .right )
214+
205215 self ._maybe_add_join_keys (result , left_indexer , right_indexer )
206216
207217 return result
208218
219+ def _indicator_pre_merge (self , left , right ):
220+
221+ columns = left .columns .values .tolist () + right .columns .values .tolist ()
222+
223+ for i in ['_left_indicator' , '_right_indicator' , '_merge' ]:
224+ if i in columns :
225+ raise ValueError ("Cannot use `indicator=True` option when data contains a column named {}" .format (i ))
226+
227+ left ['_left_indicator' ] = 1
228+ left ['_left_indicator' ] = left ['_left_indicator' ].astype ('int8' )
229+
230+ right ['_right_indicator' ] = 2
231+ right ['_right_indicator' ] = right ['_right_indicator' ].astype ('int8' )
232+
233+
234+ def _indicator_post_merge (self , result , left , right ):
235+ result ['_left_indicator' ].fillna (0 , inplace = True )
236+ result ['_right_indicator' ].fillna (0 , inplace = True )
237+
238+ result ['_merge' ] = Categorical ((result ['_left_indicator' ] + result ['_right_indicator' ]), categories = [1 ,2 ,3 ])
239+ result ['_merge' ].cat .rename_categories (['left_only' , 'right_only' , 'both' ], inplace = True )
240+
241+ # Cleanup
242+ result .drop (labels = ['_left_indicator' , '_right_indicator' ], axis = 1 , inplace = True )
243+ left .drop (labels = ['_left_indicator' ], axis = 1 , inplace = True )
244+ right .drop (labels = ['_right_indicator' ], axis = 1 , inplace = True )
245+
209246 def _maybe_add_join_keys (self , result , left_indexer , right_indexer ):
210247 # insert group keys
211248
0 commit comments