From 267850b548d528f4efd8de44729aa9b96c1d0f59 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 1 Nov 2019 19:50:52 -0700
Subject: [PATCH 1/3] add types

---
 pandas/core/algorithms.py  |  2 +-
 pandas/core/dtypes/cast.py |  6 +++---
 pandas/core/groupby/ops.py |  2 +-
 pandas/core/sorting.py     | 24 +++++++++++++++---------
 4 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index c70e623778315..5cfade7402a7d 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -1089,7 +1089,7 @@ def nsmallest(self):
         return self.compute("nsmallest")
 
     @staticmethod
-    def is_valid_dtype_n_method(dtype):
+    def is_valid_dtype_n_method(dtype) -> bool:
         """
         Helper function to determine if dtype is valid for
         nsmallest/nlargest methods
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 3e92906be706c..c750a388689da 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -491,7 +491,7 @@ def _ensure_dtype_type(value, dtype):
     return dtype.type(value)
 
 
-def infer_dtype_from(val, pandas_dtype=False):
+def infer_dtype_from(val, pandas_dtype: bool = False):
     """
     interpret the dtype from a scalar or array. This is a convenience
     routines to infer dtype from a scalar or an array
@@ -508,7 +508,7 @@ def infer_dtype_from(val, pandas_dtype=False):
     return infer_dtype_from_array(val, pandas_dtype=pandas_dtype)
 
 
-def infer_dtype_from_scalar(val, pandas_dtype=False):
+def infer_dtype_from_scalar(val, pandas_dtype: bool = False):
     """
     interpret the dtype from a scalar
 
@@ -583,7 +583,7 @@ def infer_dtype_from_scalar(val, pandas_dtype=False):
     return dtype, val
 
 
-def infer_dtype_from_array(arr, pandas_dtype=False):
+def infer_dtype_from_array(arr, pandas_dtype: bool = False):
     """
     infer the dtype from a scalar or array
 
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 2a7fd079679a4..19cb71cdc5528 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -790,7 +790,7 @@ def _get_axes(group):
         return group.axes
 
 
-def _is_indexed_like(obj, axes):
+def _is_indexed_like(obj, axes) -> bool:
     if isinstance(obj, Series):
         if len(axes) > 1:
             return False
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index 706f6159bcafe..9b8a1a76e419c 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -20,7 +20,7 @@
 _INT64_MAX = np.iinfo(np.int64).max
 
 
-def get_group_index(labels, shape, sort, xnull):
+def get_group_index(labels, shape, sort: bool, xnull: bool):
     """
     For the particular label_list, gets the offsets into the hypothetical list
     representing the totally ordered cartesian product of all possible label
@@ -48,7 +48,7 @@ def get_group_index(labels, shape, sort, xnull):
     labels are equal at all location.
     """
 
-    def _int64_cut_off(shape):
+    def _int64_cut_off(shape) -> int:
         acc = 1
         for i, mul in enumerate(shape):
             acc *= int(mul)
@@ -125,7 +125,7 @@ def get_compressed_ids(labels, sizes):
     return compress_group_index(ids, sort=True)
 
 
-def is_int64_overflow_possible(shape):
+def is_int64_overflow_possible(shape) -> bool:
     the_prod = 1
     for x in shape:
         the_prod *= int(x)
@@ -153,7 +153,7 @@ def decons_group_index(comp_labels, shape):
     return label_list[::-1]
 
 
-def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull):
+def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull: bool):
     """
     reconstruct labels from observed group ids
 
@@ -177,7 +177,7 @@ def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull):
     return [i8copy(lab[i]) for lab in labels]
 
 
-def indexer_from_factorized(labels, shape, compress=True):
+def indexer_from_factorized(labels, shape, compress: bool = True):
     ids = get_group_index(labels, shape, sort=True, xnull=False)
 
     if not compress:
@@ -235,7 +235,7 @@ def lexsort_indexer(keys, orders=None, na_position="last"):
     return indexer_from_factorized(labels, shape)
 
 
-def nargsort(items, kind="quicksort", ascending=True, na_position="last"):
+def nargsort(items, kind="quicksort", ascending: bool = True, na_position="last"):
     """
     This is intended to be a drop-in replacement for np.argsort which
     handles NaNs. It adds ascending and na_position parameters.
@@ -325,7 +325,7 @@ def get_indexer_dict(label_list, keys):
 # sorting levels...cleverly?
 
 
-def get_group_index_sorter(group_index, ngroups):
+def get_group_index_sorter(group_index, ngroups: int):
     """
     algos.groupsort_indexer implements `counting sort` and it is at least
     O(ngroups), where
@@ -350,7 +350,7 @@ def get_group_index_sorter(group_index, ngroups):
         return group_index.argsort(kind="mergesort")
 
 
-def compress_group_index(group_index, sort=True):
+def compress_group_index(group_index, sort: bool = True):
     """
     Group_index is offsets into cartesian product of all possible labels. This
     space can be huge, so this function compresses it, by computing offsets
@@ -391,7 +391,13 @@ def _reorder_by_uniques(uniques, labels):
     return uniques, labels
 
 
-def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False, verify=True):
+def safe_sort(
+    values,
+    labels=None,
+    na_sentinel: int = -1,
+    assume_unique: bool = False,
+    verify: bool = True,
+):
     """
     Sort ``values`` and reorder corresponding ``labels``.
     ``values`` should be unique if ``labels`` is not None.

From 33c1a1ac2d7a99d88f946b0419b61ae3cff661d7 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 1 Nov 2019 20:01:23 -0700
Subject: [PATCH 2/3] Add types

---
 pandas/core/reshape/tile.py | 35 +++++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py
index a902c63e20e7d..c65f751d4ed36 100644
--- a/pandas/core/reshape/tile.py
+++ b/pandas/core/reshape/tile.py
@@ -38,12 +38,12 @@
 def cut(
     x,
     bins,
-    right=True,
+    right: bool = True,
     labels=None,
-    retbins=False,
-    precision=3,
-    include_lowest=False,
-    duplicates="raise",
+    retbins: bool = False,
+    precision: int = 3,
+    include_lowest: bool = False,
+    duplicates: str = "raise",
 ):
     """
     Bin values into discrete intervals.
@@ -275,7 +275,14 @@ def cut(
     )
 
 
-def qcut(x, q, labels=None, retbins=False, precision=3, duplicates="raise"):
+def qcut(
+    x,
+    q,
+    labels=None,
+    retbins: bool = False,
+    precision: int = 3,
+    duplicates: str = "raise",
+):
     """
     Quantile-based discretization function. Discretize variable into
     equal-sized buckets based on rank or based on sample quantiles. For example
@@ -355,12 +362,12 @@ def qcut(x, q, labels=None, retbins=False, precision=3, duplicates="raise"):
 def _bins_to_cuts(
     x,
     bins,
-    right=True,
+    right: bool = True,
     labels=None,
-    precision=3,
-    include_lowest=False,
+    precision: int = 3,
+    include_lowest: bool = False,
     dtype=None,
-    duplicates="raise",
+    duplicates: str = "raise",
 ):
 
     if duplicates not in ["raise", "drop"]:
@@ -498,7 +505,9 @@ def _convert_bin_to_datelike_type(bins, dtype):
     return bins
 
 
-def _format_labels(bins, precision, right=True, include_lowest=False, dtype=None):
+def _format_labels(
+    bins, precision, right: bool = True, include_lowest: bool = False, dtype=None
+):
     """ based on the dtype, return our labels """
 
     closed = "right" if right else "left"
@@ -556,7 +565,9 @@ def _preprocess_for_cut(x):
     return x_is_series, series_index, name, x
 
 
-def _postprocess_for_cut(fac, bins, retbins, x_is_series, series_index, name, dtype):
+def _postprocess_for_cut(
+    fac, bins, retbins: bool, x_is_series, series_index, name, dtype
+):
     """
     handles post processing for the cut method where
     we combine the index information if the originally passed

From e7cd7d65e84f6b118ccab03f7ae5d532ba2a2823 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 2 Nov 2019 08:28:54 -0700
Subject: [PATCH 3/3] use lambda instead of partial to make mypy happy

---
 pandas/core/reshape/tile.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py
index c65f751d4ed36..09db840ca4db0 100644
--- a/pandas/core/reshape/tile.py
+++ b/pandas/core/reshape/tile.py
@@ -1,8 +1,6 @@
 """
 Quantilization functions and related stuff
 """
-from functools import partial
-
 import numpy as np
 
 from pandas._libs import Timedelta, Timestamp
@@ -513,7 +511,7 @@ def _format_labels(
     closed = "right" if right else "left"
 
     if is_datetime64tz_dtype(dtype):
-        formatter = partial(Timestamp, tz=dtype.tz)
+        formatter = lambda x: Timestamp(x, tz=dtype.tz)
         adjust = lambda x: x - Timedelta("1ns")
     elif is_datetime64_dtype(dtype):
         formatter = Timestamp