diff --git a/pandas/core/ops.py b/pandas/core/ops.py index e0aa0a4a415e1..3db2dd849ccee 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -41,6 +41,297 @@ ABCIndex, ABCPeriodIndex) + +def _gen_eval_kwargs(name): + """ + Find the keyword arguments to pass to numexpr for the given operation. + + Parameters + ---------- + name : str + + Returns + ------- + eval_kwargs : dict + + Examples + -------- + >>> _gen_eval_kwargs("__add__") + {} + + >>> _gen_eval_kwargs("rtruediv") + {"reversed": True, "truediv": True} + """ + kwargs = {} + + # Series and Panel appear to only pass __add__, __radd__, ... + # but DataFrame gets both these dunder names _and_ non-dunder names + # add, radd, ... + name = name.replace('__', '') + + if name.startswith('r'): + if name not in ['radd', 'rand', 'ror', 'rxor']: + # Exclude commutative operations + kwargs['reversed'] = True + + if name in ['truediv', 'rtruediv']: + kwargs['truediv'] = True + + if name in ['ne']: + kwargs['masker'] = True + + return kwargs + + +def _gen_fill_zeros(name): + """ + Find the appropriate fill value to use when filling in undefined values + in the results of the given operation caused by operating on + (generally dividing by) zero. + + Parameters + ---------- + name : str + + Returns + ------- + fill_value : {None, np.nan, np.inf} + """ + name = name.strip('__') + if 'div' in name: + # truediv, floordiv, div, and reversed variants + fill_value = np.inf + elif 'mod' in name: + # mod, rmod + fill_value = np.nan + else: + fill_value = None + return fill_value + + +# ----------------------------------------------------------------------------- +# Docstring Generation and Templates + +_op_descriptions = { + 'add': {'op': '+', + 'desc': 'Addition', + 'reversed': False, + 'reverse': 'radd'}, + 'sub': {'op': '-', + 'desc': 'Subtraction', + 'reversed': False, + 'reverse': 'rsub'}, + 'mul': {'op': '*', + 'desc': 'Multiplication', + 'reversed': False, + 'reverse': 'rmul'}, + 'mod': {'op': '%', + 'desc': 'Modulo', + 'reversed': False, + 'reverse': 'rmod'}, + 'pow': {'op': '**', + 'desc': 'Exponential power', + 'reversed': False, + 'reverse': 'rpow'}, + 'truediv': {'op': '/', + 'desc': 'Floating division', + 'reversed': False, + 'reverse': 'rtruediv'}, + 'floordiv': {'op': '//', + 'desc': 'Integer division', + 'reversed': False, + 'reverse': 'rfloordiv'}, + 'divmod': {'op': 'divmod', + 'desc': 'Integer division and modulo', + 'reversed': False, + 'reverse': None}, + + 'eq': {'op': '==', + 'desc': 'Equal to', + 'reversed': False, + 'reverse': None}, + 'ne': {'op': '!=', + 'desc': 'Not equal to', + 'reversed': False, + 'reverse': None}, + 'lt': {'op': '<', + 'desc': 'Less than', + 'reversed': False, + 'reverse': None}, + 'le': {'op': '<=', + 'desc': 'Less than or equal to', + 'reversed': False, + 'reverse': None}, + 'gt': {'op': '>', + 'desc': 'Greater than', + 'reversed': False, + 'reverse': None}, + 'ge': {'op': '>=', + 'desc': 'Greater than or equal to', + 'reversed': False, + 'reverse': None}} + +_op_names = list(_op_descriptions.keys()) +for key in _op_names: + reverse_op = _op_descriptions[key]['reverse'] + if reverse_op is not None: + _op_descriptions[reverse_op] = _op_descriptions[key].copy() + _op_descriptions[reverse_op]['reversed'] = True + _op_descriptions[reverse_op]['reverse'] = key + +_flex_doc_SERIES = """ +{desc} of series and other, element-wise (binary operator `{op_name}`). + +Equivalent to ``{equiv}``, but with support to substitute a fill_value for +missing data in one of the inputs. + +Parameters +---------- +other : Series or scalar value +fill_value : None or float value, default None (NaN) + Fill missing (NaN) values with this value. If both Series are + missing, the result will be missing +level : int or name + Broadcast across a level, matching Index values on the + passed MultiIndex level + +Returns +------- +result : Series + +See also +-------- +Series.{reverse} +""" + +_arith_doc_FRAME = """ +Binary operator %s with support to substitute a fill_value for missing data in +one of the inputs + +Parameters +---------- +other : Series, DataFrame, or constant +axis : {0, 1, 'index', 'columns'} + For Series input, axis to match Series index on +fill_value : None or float value, default None + Fill missing (NaN) values with this value. If both DataFrame locations are + missing, the result will be missing +level : int or name + Broadcast across a level, matching Index values on the + passed MultiIndex level + +Notes +----- +Mismatched indices will be unioned together + +Returns +------- +result : DataFrame +""" + +_flex_doc_FRAME = """ +{desc} of dataframe and other, element-wise (binary operator `{op_name}`). + +Equivalent to ``{equiv}``, but with support to substitute a fill_value for +missing data in one of the inputs. + +Parameters +---------- +other : Series, DataFrame, or constant +axis : {{0, 1, 'index', 'columns'}} + For Series input, axis to match Series index on +fill_value : None or float value, default None + Fill missing (NaN) values with this value. If both DataFrame + locations are missing, the result will be missing +level : int or name + Broadcast across a level, matching Index values on the + passed MultiIndex level + +Notes +----- +Mismatched indices will be unioned together + +Returns +------- +result : DataFrame + +See also +-------- +DataFrame.{reverse} +""" + +_flex_doc_PANEL = """ +{desc} of series and other, element-wise (binary operator `{op_name}`). +Equivalent to ``{equiv}``. + +Parameters +---------- +other : DataFrame or Panel +axis : {{items, major_axis, minor_axis}} + Axis to broadcast over + +Returns +------- +Panel + +See also +-------- +Panel.{reverse} +""" + + +_agg_doc_PANEL = """ +Wrapper method for {wrp_method} + +Parameters +---------- +other : {construct} or {cls_name} +axis : {{{axis_order}}} + Axis to broadcast over + +Returns +------- +{cls_name} +""" + + +def _make_flex_doc(op_name, typ): + """ + Make the appropriate substitutions for the given operation and class-typ + into either _flex_doc_SERIES or _flex_doc_FRAME to return the docstring + to attach to a generated method. + + Parameters + ---------- + op_name : str {'__add__', '__sub__', ... '__eq__', '__ne__', ...} + typ : str {series, 'dataframe']} + + Returns + ------- + doc : str + """ + op_name = op_name.replace('__', '') + op_desc = _op_descriptions[op_name] + + if op_desc['reversed']: + equiv = 'other ' + op_desc['op'] + ' ' + typ + else: + equiv = typ + ' ' + op_desc['op'] + ' other' + + if typ == 'series': + base_doc = _flex_doc_SERIES + elif typ == 'dataframe': + base_doc = _flex_doc_FRAME + elif typ == 'panel': + base_doc = _flex_doc_PANEL + else: + raise AssertionError('Invalid typ argument.') + + doc = base_doc.format(desc=op_desc['desc'], op_name=op_name, + equiv=equiv, reverse=op_desc['reverse']) + return doc + + # ----------------------------------------------------------------------------- # Functions that add arithmetic methods to objects, given arithmetic factory # methods @@ -82,35 +373,31 @@ def names(x): mul=arith_method(operator.mul, names('mul'), op('*'), default_axis=default_axis), truediv=arith_method(operator.truediv, names('truediv'), op('/'), - truediv=True, fill_zeros=np.inf, default_axis=default_axis), floordiv=arith_method(operator.floordiv, names('floordiv'), op('//'), - default_axis=default_axis, fill_zeros=np.inf), + default_axis=default_axis), # Causes a floating point exception in the tests when numexpr enabled, # so for now no speedup mod=arith_method(operator.mod, names('mod'), None, - default_axis=default_axis, fill_zeros=np.nan), + default_axis=default_axis), pow=arith_method(operator.pow, names('pow'), op('**'), default_axis=default_axis), # not entirely sure why this is necessary, but previously was included # so it's here to maintain compatibility rmul=arith_method(operator.mul, names('rmul'), op('*'), - default_axis=default_axis, reversed=True), + default_axis=default_axis), rsub=arith_method(lambda x, y: y - x, names('rsub'), op('-'), - default_axis=default_axis, reversed=True), + default_axis=default_axis), rtruediv=arith_method(lambda x, y: operator.truediv(y, x), - names('rtruediv'), op('/'), truediv=True, - fill_zeros=np.inf, default_axis=default_axis, - reversed=True), + names('rtruediv'), op('/'), + default_axis=default_axis), rfloordiv=arith_method(lambda x, y: operator.floordiv(y, x), names('rfloordiv'), op('//'), - default_axis=default_axis, fill_zeros=np.inf, - reversed=True), + default_axis=default_axis), rpow=arith_method(lambda x, y: y**x, names('rpow'), op('**'), - default_axis=default_axis, reversed=True), + default_axis=default_axis), rmod=arith_method(lambda x, y: y % x, names('rmod'), op('%'), - default_axis=default_axis, fill_zeros=np.nan, - reversed=True),) + default_axis=default_axis)) # yapf: enable new_methods['div'] = new_methods['truediv'] new_methods['rdiv'] = new_methods['rtruediv'] @@ -119,11 +406,11 @@ def names(x): if comp_method: new_methods.update(dict( eq=comp_method(operator.eq, names('eq'), op('==')), - ne=comp_method(operator.ne, names('ne'), op('!='), masker=True), + ne=comp_method(operator.ne, names('ne'), op('!=')), lt=comp_method(operator.lt, names('lt'), op('<')), gt=comp_method(operator.gt, names('gt'), op('>')), le=comp_method(operator.le, names('le'), op('<=')), - ge=comp_method(operator.ge, names('ge'), op('>=')), )) + ge=comp_method(operator.ge, names('ge'), op('>=')))) if bool_method: new_methods.update( dict(and_=bool_method(operator.and_, names('and_'), op('&')), @@ -138,13 +425,10 @@ def names(x): names('rxor'), op('^')))) if have_divmod: # divmod doesn't have an op that is supported by numexpr - new_methods['divmod'] = arith_method( - divmod, - names('divmod'), - None, - default_axis=default_axis, - construct_result=_construct_divmod_result, - ) + new_methods['divmod'] = arith_method(divmod, + names('divmod'), + None, + default_axis=default_axis) new_methods = {names(k): v for k, v in new_methods.items()} return new_methods @@ -170,7 +454,7 @@ def add_special_arithmetic_methods(cls, arith_method=None, ---------- arith_method : function (optional) factory for special arithmetic methods, with op string: - f(op, name, str_rep, default_axis=None, fill_zeros=None, **eval_kwargs) + f(op, name, str_rep, default_axis=None) comp_method : function (optional) factory for rich comparison - signature: f(op, name, str_rep) bool_method : function (optional) @@ -242,7 +526,7 @@ def add_flex_arithmetic_methods(cls, flex_arith_method, ---------- flex_arith_method : function factory for special arithmetic methods, with op string: - f(op, name, str_rep, default_axis=None, fill_zeros=None, **eval_kwargs) + f(op, name, str_rep, default_axis=None) flex_comp_method : function, optional, factory for rich comparison - signature: f(op, name, str_rep) use_numexpr : bool, default True @@ -267,6 +551,9 @@ def add_flex_arithmetic_methods(cls, flex_arith_method, add_methods(cls, new_methods=new_methods, force=force) +# ----------------------------------------------------------------------------- +# Series + def _align_method_SERIES(left, right, align_asobject=False): """ align lhs and rhs Series """ @@ -310,12 +597,16 @@ def _construct_divmod_result(left, result, index, name, dtype): ) -def _arith_method_SERIES(op, name, str_rep, fill_zeros=None, default_axis=None, - construct_result=_construct_result, **eval_kwargs): +def _arith_method_SERIES(op, name, str_rep, default_axis=None): """ Wrapper function for Series arithmetic operations, to avoid code duplication. """ + eval_kwargs = _gen_eval_kwargs(name) + fill_zeros = _gen_fill_zeros(name) + construct_result = (_construct_divmod_result + if op is divmod else _construct_result) + def na_op(x, y): import pandas.core.computation.expressions as expressions @@ -448,11 +739,12 @@ def _comp_method_OBJECT_ARRAY(op, x, y): return result -def _comp_method_SERIES(op, name, str_rep, masker=False): +def _comp_method_SERIES(op, name, str_rep): """ Wrapper function for Series arithmetic operations, to avoid code duplication. """ + masker = _gen_eval_kwargs(name).get('masker', False) def na_op(x, y): @@ -641,109 +933,8 @@ def wrapper(self, other): return wrapper -_op_descriptions = {'add': {'op': '+', - 'desc': 'Addition', - 'reversed': False, - 'reverse': 'radd'}, - 'sub': {'op': '-', - 'desc': 'Subtraction', - 'reversed': False, - 'reverse': 'rsub'}, - 'mul': {'op': '*', - 'desc': 'Multiplication', - 'reversed': False, - 'reverse': 'rmul'}, - 'mod': {'op': '%', - 'desc': 'Modulo', - 'reversed': False, - 'reverse': 'rmod'}, - 'pow': {'op': '**', - 'desc': 'Exponential power', - 'reversed': False, - 'reverse': 'rpow'}, - 'truediv': {'op': '/', - 'desc': 'Floating division', - 'reversed': False, - 'reverse': 'rtruediv'}, - 'floordiv': {'op': '//', - 'desc': 'Integer division', - 'reversed': False, - 'reverse': 'rfloordiv'}, - 'divmod': {'op': 'divmod', - 'desc': 'Integer division and modulo', - 'reversed': False, - 'reverse': None}, - - 'eq': {'op': '==', - 'desc': 'Equal to', - 'reversed': False, - 'reverse': None}, - 'ne': {'op': '!=', - 'desc': 'Not equal to', - 'reversed': False, - 'reverse': None}, - 'lt': {'op': '<', - 'desc': 'Less than', - 'reversed': False, - 'reverse': None}, - 'le': {'op': '<=', - 'desc': 'Less than or equal to', - 'reversed': False, - 'reverse': None}, - 'gt': {'op': '>', - 'desc': 'Greater than', - 'reversed': False, - 'reverse': None}, - 'ge': {'op': '>=', - 'desc': 'Greater than or equal to', - 'reversed': False, - 'reverse': None}} - -_op_names = list(_op_descriptions.keys()) -for k in _op_names: - reverse_op = _op_descriptions[k]['reverse'] - _op_descriptions[reverse_op] = _op_descriptions[k].copy() - _op_descriptions[reverse_op]['reversed'] = True - _op_descriptions[reverse_op]['reverse'] = k - - -_flex_doc_SERIES = """ -%s of series and other, element-wise (binary operator `%s`). - -Equivalent to ``%s``, but with support to substitute a fill_value for -missing data in one of the inputs. - -Parameters ----------- -other : Series or scalar value -fill_value : None or float value, default None (NaN) - Fill missing (NaN) values with this value. If both Series are - missing, the result will be missing -level : int or name - Broadcast across a level, matching Index values on the - passed MultiIndex level - -Returns -------- -result : Series - -See also --------- -Series.%s -""" - - -def _flex_method_SERIES(op, name, str_rep, default_axis=None, fill_zeros=None, - **eval_kwargs): - op_name = name.replace('__', '') - op_desc = _op_descriptions[op_name] - if op_desc['reversed']: - equiv = 'other ' + op_desc['op'] + ' series' - else: - equiv = 'series ' + op_desc['op'] + ' other' - - doc = _flex_doc_SERIES % (op_desc['desc'], op_name, equiv, - op_desc['reverse']) +def _flex_method_SERIES(op, name, str_rep, default_axis=None): + doc = _make_flex_doc(name, 'series') @Appender(doc) def flex_wrapper(self, other, level=None, fill_value=None, axis=0): @@ -776,62 +967,9 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): bool_method=_bool_method_SERIES, have_divmod=True) -_arith_doc_FRAME = """ -Binary operator %s with support to substitute a fill_value for missing data in -one of the inputs - -Parameters ----------- -other : Series, DataFrame, or constant -axis : {0, 1, 'index', 'columns'} - For Series input, axis to match Series index on -fill_value : None or float value, default None - Fill missing (NaN) values with this value. If both DataFrame locations are - missing, the result will be missing -level : int or name - Broadcast across a level, matching Index values on the - passed MultiIndex level - -Notes ------ -Mismatched indices will be unioned together - -Returns -------- -result : DataFrame -""" - -_flex_doc_FRAME = """ -%s of dataframe and other, element-wise (binary operator `%s`). - -Equivalent to ``%s``, but with support to substitute a fill_value for -missing data in one of the inputs. - -Parameters ----------- -other : Series, DataFrame, or constant -axis : {0, 1, 'index', 'columns'} - For Series input, axis to match Series index on -fill_value : None or float value, default None - Fill missing (NaN) values with this value. If both DataFrame - locations are missing, the result will be missing -level : int or name - Broadcast across a level, matching Index values on the - passed MultiIndex level - -Notes ------ -Mismatched indices will be unioned together - -Returns -------- -result : DataFrame - -See also --------- -DataFrame.%s -""" +# ----------------------------------------------------------------------------- +# DataFrame def _align_method_FRAME(left, right, axis): """ convert rhs to meet lhs dims if input is list, tuple or np.ndarray """ @@ -877,8 +1015,10 @@ def to_series(right): return right -def _arith_method_FRAME(op, name, str_rep=None, default_axis='columns', - fill_zeros=None, **eval_kwargs): +def _arith_method_FRAME(op, name, str_rep=None, default_axis='columns'): + eval_kwargs = _gen_eval_kwargs(name) + fill_zeros = _gen_fill_zeros(name) + def na_op(x, y): import pandas.core.computation.expressions as expressions @@ -923,15 +1063,8 @@ def na_op(x, y): return result if name in _op_descriptions: - op_name = name.replace('__', '') - op_desc = _op_descriptions[op_name] - if op_desc['reversed']: - equiv = 'other ' + op_desc['op'] + ' dataframe' - else: - equiv = 'dataframe ' + op_desc['op'] + ' other' - - doc = _flex_doc_FRAME % (op_desc['desc'], op_name, equiv, - op_desc['reverse']) + # i.e. include "add" but not "__add__" + doc = _make_flex_doc(name, 'dataframe') else: doc = _arith_doc_FRAME % name @@ -955,9 +1088,8 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): return f -# Masker unused for now -def _flex_comp_method_FRAME(op, name, str_rep=None, default_axis='columns', - masker=False): +def _flex_comp_method_FRAME(op, name, str_rep=None, default_axis='columns'): + def na_op(x, y): try: with np.errstate(invalid='ignore'): @@ -1003,7 +1135,7 @@ def f(self, other, axis=default_axis, level=None): return f -def _comp_method_FRAME(func, name, str_rep, masker=False): +def _comp_method_FRAME(func, name, str_rep): @Appender('Wrapper for comparison method {name}'.format(name=name)) def f(self, other): if isinstance(other, ABCDataFrame): # Another DataFrame @@ -1032,8 +1164,10 @@ def f(self, other): bool_method=_arith_method_FRAME) -def _arith_method_PANEL(op, name, str_rep=None, fill_zeros=None, - default_axis=None, **eval_kwargs): +# ----------------------------------------------------------------------------- +# Panel + +def _arith_method_PANEL(op, name, str_rep=None, default_axis=None): # work only for scalars def f(self, other): @@ -1048,7 +1182,7 @@ def f(self, other): return f -def _comp_method_PANEL(op, name, str_rep=None, masker=False): +def _comp_method_PANEL(op, name, str_rep=None): def na_op(x, y): import pandas.core.computation.expressions as expressions diff --git a/pandas/core/panel.py b/pandas/core/panel.py index ae86074ce2d05..afdd9bae3006f 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -1525,8 +1525,11 @@ def _extract_axis(self, data, axis=0, intersect=False): def _add_aggregate_operations(cls, use_numexpr=True): """ add the operations to the cls; evaluate the doc strings again """ - def _panel_arith_method(op, name, str_rep=None, default_axis=None, - fill_zeros=None, **eval_kwargs): + def _panel_arith_method(op, name, str_rep=None, default_axis=None): + + eval_kwargs = ops._gen_eval_kwargs(name) + fill_zeros = ops._gen_fill_zeros(name) + def na_op(x, y): import pandas.core.computation.expressions as expressions @@ -1544,50 +1547,10 @@ def na_op(x, y): return result if name in ops._op_descriptions: - op_name = name.replace('__', '') - op_desc = ops._op_descriptions[op_name] - if op_desc['reversed']: - equiv = 'other ' + op_desc['op'] + ' panel' - else: - equiv = 'panel ' + op_desc['op'] + ' other' - - _op_doc = """ -{desc} of series and other, element-wise (binary operator `{op_name}`). -Equivalent to ``{equiv}``. - -Parameters ----------- -other : {construct} or {cls_name} -axis : {{{axis_order}}} - Axis to broadcast over - -Returns -------- -{cls_name} - -See also --------- -{cls_name}.{reverse}\n""" - doc = _op_doc.format( - desc=op_desc['desc'], op_name=op_name, equiv=equiv, - construct=cls._constructor_sliced.__name__, - cls_name=cls.__name__, reverse=op_desc['reverse'], - axis_order=', '.join(cls._AXIS_ORDERS)) + doc = ops._make_flex_doc(name, 'panel') else: # doc strings substitors - _agg_doc = """ - Wrapper method for {wrp_method} - - Parameters - ---------- - other : {construct} or {cls_name} - axis : {{{axis_order}}} - Axis to broadcast over - - Returns - ------- - {cls_name}\n""" - doc = _agg_doc.format( + doc = ops._agg_doc_PANEL.format( construct=cls._constructor_sliced.__name__, cls_name=cls.__name__, wrp_method=name, axis_order=', '.join(cls._AXIS_ORDERS)) diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index 9b2650359bf68..059e399593971 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -43,8 +43,7 @@ _sparray_doc_kwargs = dict(klass='SparseArray') -def _arith_method(op, name, str_rep=None, default_axis=None, fill_zeros=None, - **eval_kwargs): +def _arith_method_SPARSE_ARRAY(op, name, str_rep=None, default_axis=None): """ Wrapper function for Series arithmetic operations, to avoid code duplication. @@ -864,7 +863,8 @@ def _make_index(length, indices, kind): return index -ops.add_special_arithmetic_methods(SparseArray, arith_method=_arith_method, - comp_method=_arith_method, - bool_method=_arith_method, +ops.add_special_arithmetic_methods(SparseArray, + arith_method=_arith_method_SPARSE_ARRAY, + comp_method=_arith_method_SPARSE_ARRAY, + bool_method=_arith_method_SPARSE_ARRAY, use_numexpr=False) diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 4b649927f8f72..3506284161660 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -41,13 +41,12 @@ # Wrapper function for Series arithmetic methods -def _arith_method(op, name, str_rep=None, default_axis=None, fill_zeros=None, - **eval_kwargs): +def _arith_method_SPARSE_SERIES(op, name, str_rep=None, default_axis=None): """ Wrapper function for Series arithmetic operations, to avoid code duplication. - str_rep, default_axis, fill_zeros and eval_kwargs are not used, but are + str_rep and default_axis are not used, but are present for compatibility. """ @@ -864,7 +863,8 @@ def from_coo(cls, A, dense_index=False): **ops.series_flex_funcs) # overwrite basic arithmetic to use SparseSeries version # force methods to overwrite previous definitions. -ops.add_special_arithmetic_methods(SparseSeries, _arith_method, - comp_method=_arith_method, +ops.add_special_arithmetic_methods(SparseSeries, + arith_method=_arith_method_SPARSE_SERIES, + comp_method=_arith_method_SPARSE_SERIES, bool_method=None, use_numexpr=False, force=True)