From 267850b548d528f4efd8de44729aa9b96c1d0f59 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 1 Nov 2019 19:50:52 -0700 Subject: [PATCH 1/3] add types --- pandas/core/algorithms.py | 2 +- pandas/core/dtypes/cast.py | 6 +++--- pandas/core/groupby/ops.py | 2 +- pandas/core/sorting.py | 24 +++++++++++++++--------- 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index c70e623778315..5cfade7402a7d 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1089,7 +1089,7 @@ def nsmallest(self): return self.compute("nsmallest") @staticmethod - def is_valid_dtype_n_method(dtype): + def is_valid_dtype_n_method(dtype) -> bool: """ Helper function to determine if dtype is valid for nsmallest/nlargest methods diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 3e92906be706c..c750a388689da 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -491,7 +491,7 @@ def _ensure_dtype_type(value, dtype): return dtype.type(value) -def infer_dtype_from(val, pandas_dtype=False): +def infer_dtype_from(val, pandas_dtype: bool = False): """ interpret the dtype from a scalar or array. This is a convenience routines to infer dtype from a scalar or an array @@ -508,7 +508,7 @@ def infer_dtype_from(val, pandas_dtype=False): return infer_dtype_from_array(val, pandas_dtype=pandas_dtype) -def infer_dtype_from_scalar(val, pandas_dtype=False): +def infer_dtype_from_scalar(val, pandas_dtype: bool = False): """ interpret the dtype from a scalar @@ -583,7 +583,7 @@ def infer_dtype_from_scalar(val, pandas_dtype=False): return dtype, val -def infer_dtype_from_array(arr, pandas_dtype=False): +def infer_dtype_from_array(arr, pandas_dtype: bool = False): """ infer the dtype from a scalar or array diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 2a7fd079679a4..19cb71cdc5528 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -790,7 +790,7 @@ def _get_axes(group): return group.axes -def _is_indexed_like(obj, axes): +def _is_indexed_like(obj, axes) -> bool: if isinstance(obj, Series): if len(axes) > 1: return False diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 706f6159bcafe..9b8a1a76e419c 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -20,7 +20,7 @@ _INT64_MAX = np.iinfo(np.int64).max -def get_group_index(labels, shape, sort, xnull): +def get_group_index(labels, shape, sort: bool, xnull: bool): """ For the particular label_list, gets the offsets into the hypothetical list representing the totally ordered cartesian product of all possible label @@ -48,7 +48,7 @@ def get_group_index(labels, shape, sort, xnull): labels are equal at all location. """ - def _int64_cut_off(shape): + def _int64_cut_off(shape) -> int: acc = 1 for i, mul in enumerate(shape): acc *= int(mul) @@ -125,7 +125,7 @@ def get_compressed_ids(labels, sizes): return compress_group_index(ids, sort=True) -def is_int64_overflow_possible(shape): +def is_int64_overflow_possible(shape) -> bool: the_prod = 1 for x in shape: the_prod *= int(x) @@ -153,7 +153,7 @@ def decons_group_index(comp_labels, shape): return label_list[::-1] -def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull): +def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull: bool): """ reconstruct labels from observed group ids @@ -177,7 +177,7 @@ def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull): return [i8copy(lab[i]) for lab in labels] -def indexer_from_factorized(labels, shape, compress=True): +def indexer_from_factorized(labels, shape, compress: bool = True): ids = get_group_index(labels, shape, sort=True, xnull=False) if not compress: @@ -235,7 +235,7 @@ def lexsort_indexer(keys, orders=None, na_position="last"): return indexer_from_factorized(labels, shape) -def nargsort(items, kind="quicksort", ascending=True, na_position="last"): +def nargsort(items, kind="quicksort", ascending: bool = True, na_position="last"): """ This is intended to be a drop-in replacement for np.argsort which handles NaNs. It adds ascending and na_position parameters. @@ -325,7 +325,7 @@ def get_indexer_dict(label_list, keys): # sorting levels...cleverly? -def get_group_index_sorter(group_index, ngroups): +def get_group_index_sorter(group_index, ngroups: int): """ algos.groupsort_indexer implements `counting sort` and it is at least O(ngroups), where @@ -350,7 +350,7 @@ def get_group_index_sorter(group_index, ngroups): return group_index.argsort(kind="mergesort") -def compress_group_index(group_index, sort=True): +def compress_group_index(group_index, sort: bool = True): """ Group_index is offsets into cartesian product of all possible labels. This space can be huge, so this function compresses it, by computing offsets @@ -391,7 +391,13 @@ def _reorder_by_uniques(uniques, labels): return uniques, labels -def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False, verify=True): +def safe_sort( + values, + labels=None, + na_sentinel: int = -1, + assume_unique: bool = False, + verify: bool = True, +): """ Sort ``values`` and reorder corresponding ``labels``. ``values`` should be unique if ``labels`` is not None. From 33c1a1ac2d7a99d88f946b0419b61ae3cff661d7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 1 Nov 2019 20:01:23 -0700 Subject: [PATCH 2/3] Add types --- pandas/core/reshape/tile.py | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index a902c63e20e7d..c65f751d4ed36 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -38,12 +38,12 @@ def cut( x, bins, - right=True, + right: bool = True, labels=None, - retbins=False, - precision=3, - include_lowest=False, - duplicates="raise", + retbins: bool = False, + precision: int = 3, + include_lowest: bool = False, + duplicates: str = "raise", ): """ Bin values into discrete intervals. @@ -275,7 +275,14 @@ def cut( ) -def qcut(x, q, labels=None, retbins=False, precision=3, duplicates="raise"): +def qcut( + x, + q, + labels=None, + retbins: bool = False, + precision: int = 3, + duplicates: str = "raise", +): """ Quantile-based discretization function. Discretize variable into equal-sized buckets based on rank or based on sample quantiles. For example @@ -355,12 +362,12 @@ def qcut(x, q, labels=None, retbins=False, precision=3, duplicates="raise"): def _bins_to_cuts( x, bins, - right=True, + right: bool = True, labels=None, - precision=3, - include_lowest=False, + precision: int = 3, + include_lowest: bool = False, dtype=None, - duplicates="raise", + duplicates: str = "raise", ): if duplicates not in ["raise", "drop"]: @@ -498,7 +505,9 @@ def _convert_bin_to_datelike_type(bins, dtype): return bins -def _format_labels(bins, precision, right=True, include_lowest=False, dtype=None): +def _format_labels( + bins, precision, right: bool = True, include_lowest: bool = False, dtype=None +): """ based on the dtype, return our labels """ closed = "right" if right else "left" @@ -556,7 +565,9 @@ def _preprocess_for_cut(x): return x_is_series, series_index, name, x -def _postprocess_for_cut(fac, bins, retbins, x_is_series, series_index, name, dtype): +def _postprocess_for_cut( + fac, bins, retbins: bool, x_is_series, series_index, name, dtype +): """ handles post processing for the cut method where we combine the index information if the originally passed From e7cd7d65e84f6b118ccab03f7ae5d532ba2a2823 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 2 Nov 2019 08:28:54 -0700 Subject: [PATCH 3/3] use lambda instead of partial to make mypy happy --- pandas/core/reshape/tile.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index c65f751d4ed36..09db840ca4db0 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -1,8 +1,6 @@ """ Quantilization functions and related stuff """ -from functools import partial - import numpy as np from pandas._libs import Timedelta, Timestamp @@ -513,7 +511,7 @@ def _format_labels( closed = "right" if right else "left" if is_datetime64tz_dtype(dtype): - formatter = partial(Timestamp, tz=dtype.tz) + formatter = lambda x: Timestamp(x, tz=dtype.tz) adjust = lambda x: x - Timedelta("1ns") elif is_datetime64_dtype(dtype): formatter = Timestamp