2525 is_timedelta64_dtype ,
2626 is_categorical ,
2727 is_categorical_dtype ,
28- is_integer_dtype ,
2928 is_list_like , is_sequence ,
3029 is_scalar ,
3130 is_dict_like )
@@ -261,6 +260,7 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
261260 # c.) infer from values
262261
263262 if dtype is not None :
263+ # The dtype argument takes precedence over values.dtype (if any)
264264 if isinstance (dtype , compat .string_types ):
265265 if dtype == 'category' :
266266 dtype = CategoricalDtype (categories , ordered )
@@ -275,9 +275,12 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
275275 ordered = dtype .ordered
276276
277277 elif is_categorical (values ):
278+ # If no "dtype" was passed, use the one from "values", but honor
279+ # the "ordered" and "categories" arguments
278280 dtype = values .dtype ._from_categorical_dtype (values .dtype ,
279281 categories , ordered )
280282 else :
283+ # If dtype=None and values is not categorical, create a new dtype
281284 dtype = CategoricalDtype (categories , ordered )
282285
283286 # At this point, dtype is always a CategoricalDtype
@@ -294,28 +297,12 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
294297
295298 # sanitize input
296299 if is_categorical_dtype (values ):
300+ if dtype .categories is None :
301+ dtype = CategoricalDtype (values .categories , dtype .ordered )
297302
298- # we are either a Series or a CategoricalIndex
299- if isinstance (values , (ABCSeries , ABCCategoricalIndex )):
300- values = values ._values
301-
302- if ordered is None :
303- ordered = values .ordered
304- if categories is None :
305- categories = values .categories
306- values = values .get_values ()
307-
308- elif isinstance (values , (ABCIndexClass , ABCSeries )):
309- # we'll do inference later
310- pass
311-
312- else :
313-
314- # on numpy < 1.6 datetimelike get inferred to all i8 by
315- # _sanitize_array which is fine, but since factorize does this
316- # correctly no need here this is an issue because _sanitize_array
317- # also coerces np.nan to a string under certain versions of numpy
318- # as well
303+ elif not isinstance (values , (ABCIndexClass , ABCSeries )):
304+ # _sanitize_array coerces np.nan to a string under certain versions
305+ # of numpy
319306 values = maybe_infer_to_datetimelike (values , convert_dates = True )
320307 if not isinstance (values , np .ndarray ):
321308 values = _convert_to_list_like (values )
@@ -335,7 +322,7 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
335322 codes , categories = factorize (values , sort = True )
336323 except TypeError :
337324 codes , categories = factorize (values , sort = False )
338- if ordered :
325+ if dtype . ordered :
339326 # raise, as we don't have a sortable data structure and so
340327 # the user should give us one by specifying categories
341328 raise TypeError ("'values' is not ordered, please "
@@ -347,34 +334,18 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
347334 raise NotImplementedError ("> 1 ndim Categorical are not "
348335 "supported at this time" )
349336
350- if dtype .categories is None :
351- # we're inferring from values
352- dtype = CategoricalDtype (categories , ordered )
337+ # we're inferring from values
338+ dtype = CategoricalDtype (categories , dtype .ordered )
353339
354- else :
355- # there were two ways if categories are present
356- # - the old one, where each value is a int pointer to the levels
357- # array -> not anymore possible, but code outside of pandas could
358- # call us like that, so make some checks
359- # - the new one, where each value is also in the categories array
360- # (or np.nan)
340+ elif is_categorical_dtype (values ):
341+ old_codes = (values .cat .codes if isinstance (values , ABCSeries )
342+ else values .codes )
343+ codes = _recode_for_categories (old_codes , values .dtype .categories ,
344+ dtype .categories )
361345
346+ else :
362347 codes = _get_codes_for_values (values , dtype .categories )
363348
364- # TODO: check for old style usage. These warnings should be removes
365- # after 0.18/ in 2016
366- if (is_integer_dtype (values ) and
367- not is_integer_dtype (dtype .categories )):
368- warn ("Values and categories have different dtypes. Did you "
369- "mean to use\n 'Categorical.from_codes(codes, "
370- "categories)'?" , RuntimeWarning , stacklevel = 2 )
371-
372- if (len (values ) and is_integer_dtype (values ) and
373- (codes == - 1 ).all ()):
374- warn ("None of the categories were found in values. Did you "
375- "mean to use\n 'Categorical.from_codes(codes, "
376- "categories)'?" , RuntimeWarning , stacklevel = 2 )
377-
378349 if null_mask .any ():
379350 # Reinsert -1 placeholders for previously removed missing values
380351 full_codes = - np .ones (null_mask .shape , dtype = codes .dtype )
0 commit comments