@@ -259,7 +259,8 @@ def _merger(x, y):
259259
260260def merge_asof (left , right , on = None ,
261261 left_on = None , right_on = None ,
262- by = None ,
262+ left_index = False , right_index = False ,
263+ by = None , left_by = None , right_by = None ,
263264 suffixes = ('_x' , '_y' ),
264265 tolerance = None ,
265266 allow_exact_matches = True ):
@@ -288,9 +289,29 @@ def merge_asof(left, right, on=None,
288289 Field name to join on in left DataFrame.
289290 right_on : label
290291 Field name to join on in right DataFrame.
292+ left_index : boolean
293+ Use the index of the left DataFrame as the join key.
294+
295+ .. versionadded:: 0.19.2
296+
297+ right_index : boolean
298+ Use the index of the right DataFrame as the join key.
299+
300+ .. versionadded:: 0.19.2
301+
291302 by : column name
292303 Group both the left and right DataFrames by the group column; perform
293304 the merge operation on these pieces and recombine.
305+ left_by : column name
306+ Field name to group by in the left DataFrame.
307+
308+ .. versionadded:: 0.19.2
309+
310+ right_by : column name
311+ Field name to group by in the right DataFrame.
312+
313+ .. versionadded:: 0.19.2
314+
294315 suffixes : 2-length sequence (tuple, list, ...)
295316 Suffix to apply to overlapping column names in the left and right
296317 side, respectively
@@ -348,6 +369,28 @@ def merge_asof(left, right, on=None,
348369 3 5 b 3.0
349370 6 10 c 7.0
350371
372+ We can use indexed DataFrames as well.
373+
374+ >>> left
375+ left_val
376+ 1 a
377+ 5 b
378+ 10 c
379+
380+ >>> right
381+ right_val
382+ 1 1
383+ 2 2
384+ 3 3
385+ 6 6
386+ 7 7
387+
388+ >>> pd.merge_asof(left, right, left_index=True, right_index=True)
389+ left_val right_val
390+ 1 a 1
391+ 5 b 3
392+ 10 c 7
393+
351394 Here is a real-world times-series example
352395
353396 >>> quotes
@@ -418,7 +461,9 @@ def merge_asof(left, right, on=None,
418461 """
419462 op = _AsOfMerge (left , right ,
420463 on = on , left_on = left_on , right_on = right_on ,
421- by = by , suffixes = suffixes ,
464+ left_index = left_index , right_index = right_index ,
465+ by = by , left_by = left_by , right_by = right_by ,
466+ suffixes = suffixes ,
422467 how = 'asof' , tolerance = tolerance ,
423468 allow_exact_matches = allow_exact_matches )
424469 return op .get_result ()
@@ -650,7 +695,7 @@ def _get_join_info(self):
650695 left_ax = self .left ._data .axes [self .axis ]
651696 right_ax = self .right ._data .axes [self .axis ]
652697
653- if self .left_index and self .right_index :
698+ if self .left_index and self .right_index and self . how != 'asof' :
654699 join_index , left_indexer , right_indexer = \
655700 left_ax .join (right_ax , how = self .how , return_indexers = True )
656701 elif self .right_index and self .how == 'left' :
@@ -731,6 +776,16 @@ def _get_merge_keys(self):
731776 is_rkey = lambda x : isinstance (
732777 x , (np .ndarray , ABCSeries )) and len (x ) == len (right )
733778
779+ # Note that pd.merge_asof() has separate 'on' and 'by' parameters. A
780+ # user could, for example, request 'left_index' and 'left_by'. In a
781+ # regular pd.merge(), users cannot specify both 'left_index' and
782+ # 'left_on'. (Instead, users have a MultiIndex). That means the
783+ # self.left_on in this function is always empty in a pd.merge(), but
784+ # a pd.merge_asof(left_index=True, left_by=...) will result in a
785+ # self.left_on array with a None in the middle of it. This requires
786+ # a work-around as designated in the code below.
787+ # See _validate_specification() for where this happens.
788+
734789 # ugh, spaghetti re #733
735790 if _any (self .left_on ) and _any (self .right_on ):
736791 for lk , rk in zip (self .left_on , self .right_on ):
@@ -740,21 +795,35 @@ def _get_merge_keys(self):
740795 right_keys .append (rk )
741796 join_names .append (None ) # what to do?
742797 else :
743- right_keys .append (right [rk ]._values )
744- join_names .append (rk )
798+ if rk is not None :
799+ right_keys .append (right [rk ]._values )
800+ join_names .append (rk )
801+ else :
802+ # work-around for merge_asof(right_index=True)
803+ right_keys .append (right .index )
804+ join_names .append (right .index .name )
745805 else :
746806 if not is_rkey (rk ):
747- right_keys .append (right [rk ]._values )
748- if lk == rk :
807+ if rk is not None :
808+ right_keys .append (right [rk ]._values )
809+ else :
810+ # work-around for merge_asof(right_index=True)
811+ right_keys .append (right .index )
812+ if lk is not None and lk == rk :
749813 # avoid key upcast in corner case (length-0)
750814 if len (left ) > 0 :
751815 right_drop .append (rk )
752816 else :
753817 left_drop .append (lk )
754818 else :
755819 right_keys .append (rk )
756- left_keys .append (left [lk ]._values )
757- join_names .append (lk )
820+ if lk is not None :
821+ left_keys .append (left [lk ]._values )
822+ join_names .append (lk )
823+ else :
824+ # work-around for merge_asof(left_index=True)
825+ left_keys .append (left .index )
826+ join_names .append (left .index .name )
758827 elif _any (self .left_on ):
759828 for k in self .left_on :
760829 if is_lkey (k ):
@@ -879,13 +948,15 @@ def _get_join_indexers(left_keys, right_keys, sort=False, how='inner',
879948class _OrderedMerge (_MergeOperation ):
880949 _merge_type = 'ordered_merge'
881950
882- def __init__ (self , left , right , on = None , left_on = None ,
883- right_on = None , axis = 1 ,
951+ def __init__ (self , left , right , on = None , left_on = None , right_on = None ,
952+ left_index = False , right_index = False , axis = 1 ,
884953 suffixes = ('_x' , '_y' ), copy = True ,
885954 fill_method = None , how = 'outer' ):
886955
887956 self .fill_method = fill_method
888957 _MergeOperation .__init__ (self , left , right , on = on , left_on = left_on ,
958+ left_index = left_index ,
959+ right_index = right_index ,
889960 right_on = right_on , axis = axis ,
890961 how = how , suffixes = suffixes ,
891962 sort = True # factorize sorts
@@ -958,43 +1029,68 @@ def _get_cython_type(dtype):
9581029class _AsOfMerge (_OrderedMerge ):
9591030 _merge_type = 'asof_merge'
9601031
961- def __init__ (self , left , right , on = None , by = None , left_on = None ,
962- right_on = None , axis = 1 ,
963- suffixes = ('_x' , '_y' ), copy = True ,
1032+ def __init__ (self , left , right , on = None , left_on = None , right_on = None ,
1033+ left_index = False , right_index = False ,
1034+ by = None , left_by = None , right_by = None ,
1035+ axis = 1 , suffixes = ('_x' , '_y' ), copy = True ,
9641036 fill_method = None ,
9651037 how = 'asof' , tolerance = None ,
9661038 allow_exact_matches = True ):
9671039
9681040 self .by = by
1041+ self .left_by = left_by
1042+ self .right_by = right_by
9691043 self .tolerance = tolerance
9701044 self .allow_exact_matches = allow_exact_matches
9711045
9721046 _OrderedMerge .__init__ (self , left , right , on = on , left_on = left_on ,
973- right_on = right_on , axis = axis ,
1047+ right_on = right_on , left_index = left_index ,
1048+ right_index = right_index , axis = axis ,
9741049 how = how , suffixes = suffixes ,
9751050 fill_method = fill_method )
9761051
9771052 def _validate_specification (self ):
9781053 super (_AsOfMerge , self )._validate_specification ()
9791054
9801055 # we only allow on to be a single item for on
981- if len (self .left_on ) != 1 :
1056+ if len (self .left_on ) != 1 and not self . left_index :
9821057 raise MergeError ("can only asof on a key for left" )
9831058
984- if len (self .right_on ) != 1 :
1059+ if len (self .right_on ) != 1 and not self . right_index :
9851060 raise MergeError ("can only asof on a key for right" )
9861061
1062+ if self .left_index and isinstance (self .left .index , MultiIndex ):
1063+ raise MergeError ("left can only have one index" )
1064+
1065+ if self .right_index and isinstance (self .right .index , MultiIndex ):
1066+ raise MergeError ("right can only have one index" )
1067+
1068+ # set 'by' columns
1069+ if self .by is not None :
1070+ if self .left_by is not None or self .right_by is not None :
1071+ raise MergeError ('Can only pass by OR left_by '
1072+ 'and right_by' )
1073+ self .left_by = self .right_by = self .by
1074+ if self .left_by is None and self .right_by is not None :
1075+ raise MergeError ('missing left_by' )
1076+ if self .left_by is not None and self .right_by is None :
1077+ raise MergeError ('missing right_by' )
1078+
9871079 # add by to our key-list so we can have it in the
9881080 # output as a key
989- if self .by is not None :
990- if not is_list_like (self .by ):
991- self .by = [self .by ]
1081+ if self .left_by is not None :
1082+ if not is_list_like (self .left_by ):
1083+ self .left_by = [self .left_by ]
1084+ if not is_list_like (self .right_by ):
1085+ self .right_by = [self .right_by ]
9921086
993- if len (self .by ) != 1 :
1087+ if len (self .left_by ) != 1 :
1088+ raise MergeError ("can only asof by a single key" )
1089+ if len (self .right_by ) != 1 :
9941090 raise MergeError ("can only asof by a single key" )
9951091
996- self .left_on = self .by + list (self .left_on )
997- self .right_on = self .by + list (self .right_on )
1092+ self .left_on = self .left_by + list (self .left_on )
1093+ self .right_on = self .right_by + list (self .right_on )
9981094
9991095 @property
10001096 def _asof_key (self ):
@@ -1017,7 +1113,7 @@ def _get_merge_keys(self):
10171113 # validate tolerance; must be a Timedelta if we have a DTI
10181114 if self .tolerance is not None :
10191115
1020- lt = left_join_keys [self . left_on . index ( self . _asof_key ) ]
1116+ lt = left_join_keys [- 1 ]
10211117 msg = "incompatible tolerance, must be compat " \
10221118 "with type {0}" .format (type (lt ))
10231119
@@ -1047,8 +1143,10 @@ def _get_join_indexers(self):
10471143 """ return the join indexers """
10481144
10491145 # values to compare
1050- left_values = self .left_join_keys [- 1 ]
1051- right_values = self .right_join_keys [- 1 ]
1146+ left_values = (self .left .index .values if self .left_index else
1147+ self .left_join_keys [- 1 ])
1148+ right_values = (self .right .index .values if self .right_index else
1149+ self .right_join_keys [- 1 ])
10521150 tolerance = self .tolerance
10531151
10541152 # we required sortedness in the join keys
@@ -1066,7 +1164,7 @@ def _get_join_indexers(self):
10661164 tolerance = tolerance .value
10671165
10681166 # a "by" parameter requires special handling
1069- if self .by is not None :
1167+ if self .left_by is not None :
10701168 left_by_values = self .left_join_keys [0 ]
10711169 right_by_values = self .right_join_keys [0 ]
10721170
0 commit comments