1111import numpy as np
1212
1313
14- def cut (x , bins , right = True , labels = None , retbins = False , precision = 3 ):
14+ def cut (x , bins , right = True , labels = None , retbins = False , precision = 3 ,
15+ include_lowest = False ):
1516 """
1617 Return indices of half-open bins to which each value of `x` belongs.
1718
@@ -38,9 +39,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3):
3839
3940 Returns
4041 -------
41- out : ndarray of labels
42- Same shape as `x`. Array of strings by default, integers if
43- labels=False
42+ out : Categorical or array of integers if labels is False
4443 bins : ndarray of floats
4544 Returned only if `retbins` is True.
4645
@@ -50,7 +49,9 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3):
5049 a categorical variable. For example, `cut` could convert ages to groups
5150 of age ranges.
5251
53- Any NA values will be NA in the result
52+ Any NA values will be NA in the result. Out of bounds values will be NA in
53+ the resulting Categorical object
54+
5455
5556 Examples
5657 --------
@@ -95,11 +96,12 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3):
9596 raise ValueError ('bins must increase monotonically.' )
9697
9798 return _bins_to_cuts (x , bins , right = right , labels = labels ,
98- retbins = retbins , precision = precision )
99+ retbins = retbins , precision = precision ,
100+ include_lowest = include_lowest )
99101
100102
101103
102- def qcut (x , q = 4 , labels = None , retbins = False , precision = 3 ):
104+ def qcut (x , q , labels = None , retbins = False , precision = 3 ):
103105 """
104106 Quantile-based discretization function. Discretize variable into
105107 equal-sized buckets based on rank or based on sample quantiles. For example
@@ -111,8 +113,7 @@ def qcut(x, q=4, labels=None, retbins=False, precision=3):
111113 x : ndarray or Series
112114 q : integer or array of quantiles
113115 Number of quantiles. 10 for deciles, 4 for quartiles, etc. Alternately
114- array of quantiles, e.g. [0, .25, .5, .75, 1.] for quartiles. Array of
115- quantiles must span [0, 1]
116+ array of quantiles, e.g. [0, .25, .5, .75, 1.] for quartiles
116117 labels : array or boolean, default None
117118 Labels to use for bin edges, or False to return integer bin labels
118119 retbins : bool, optional
@@ -121,9 +122,11 @@ def qcut(x, q=4, labels=None, retbins=False, precision=3):
121122
122123 Returns
123124 -------
125+ cat : Categorical
124126
125127 Notes
126128 -----
129+ Out of bounds values will be NA in the resulting Categorical object
127130
128131 Examples
129132 --------
@@ -133,21 +136,22 @@ def qcut(x, q=4, labels=None, retbins=False, precision=3):
133136 else :
134137 quantiles = q
135138 bins = algos .quantile (x , quantiles )
136- bins [0 ] -= 0.001 * (x .max () - x .min ())
137-
138139 return _bins_to_cuts (x , bins , labels = labels , retbins = retbins ,
139- precision = precision )
140+ precision = precision , include_lowest = True )
140141
141142
142143def _bins_to_cuts (x , bins , right = True , labels = None , retbins = False ,
143- precision = 3 , name = None ):
144+ precision = 3 , name = None , include_lowest = False ):
144145 if name is None and isinstance (x , Series ):
145146 name = x .name
146147 x = np .asarray (x )
147148
148149 side = 'left' if right else 'right'
149150 ids = bins .searchsorted (x , side = side )
150151
152+ if include_lowest :
153+ ids [x == bins [0 ]] = 1
154+
151155 na_mask = com .isnull (x ) | (ids == len (bins )) | (ids == 0 )
152156 has_nas = na_mask .any ()
153157
@@ -157,9 +161,12 @@ def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False,
157161 if right :
158162 levels = ['(%s, %s]' % (fmt (a ), fmt (b ))
159163 for a , b in zip (bins , bins [1 :])]
164+ if include_lowest :
165+ levels [0 ] = '[' + levels [0 ][1 :]
160166 else :
161167 levels = ['[%s, %s)' % (fmt (a ), fmt (b ))
162168 for a , b in zip (bins , bins [1 :])]
169+
163170 else :
164171 if len (labels ) != len (bins ) - 1 :
165172 raise ValueError ('Bin labels must be one fewer than '
0 commit comments