diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 1b871bf0b745f..b7d5b907129c8 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1284,13 +1284,12 @@ cdef class Seen: @property def is_bool(self): - return not (self.datetime_ or self.numeric_ or self.timedelta_ - or self.nat_) - - @property - def is_float_or_complex(self): - return not (self.bool_ or self.datetime_ or self.timedelta_ - or self.nat_) + # i.e. not (anything but bool) + return not ( + self.datetime_ or self.datetimetz_ or self.timedelta_ or self.nat_ + or self.period_ or self.interval_ + or self.numeric_ or self.nan_ or self.null_ or self.object_ + ) cdef object _try_infer_map(object dtype): @@ -2641,104 +2640,83 @@ def maybe_convert_objects(ndarray[object] objects, seen.object_ = True + if seen.nat_: + if seen.object_: + result = objects + elif seen.bool_: + result = objects + elif seen.null_: + result = objects + elif not safe and seen.nan_: + result = objects + elif seen.numeric_: + result = objects + else: + if convert_datetime and convert_timedelta: + dtype = dtype_if_all_nat + if dtype is not None: + # otherwise we keep object dtype + result = _infer_all_nats( + dtype, objects.shape + ) + else: + result = objects + elif convert_datetime: + result = datetimes + elif convert_timedelta: + result = timedeltas + else: + result = objects + return result + return result + + if seen.bool_: + if seen.is_bool: + # is_bool property rules out everything else + return bools.view(np.bool_) + seen.object_ = True + if not seen.object_: result = None if not safe: - if seen.null_ or seen.nan_: - if seen.is_float_or_complex: - if seen.complex_: - result = complexes - elif seen.float_: - result = floats - elif seen.int_: - if convert_to_nullable_integer: - from pandas.core.arrays import IntegerArray - result = IntegerArray(ints, mask) - else: - result = floats - elif seen.nan_: + if seen.complex_: + result = complexes + elif seen.float_: + result = floats + elif seen.null_ or seen.nan_: + if seen.int_: + if convert_to_nullable_integer: + from pandas.core.arrays import IntegerArray + result = IntegerArray(ints, mask) + else: result = floats + elif seen.nan_: + result = floats else: - if not seen.bool_: - if seen.datetime_: - if not seen.numeric_ and not seen.timedelta_: - result = datetimes - elif seen.timedelta_: - if not seen.numeric_: - result = timedeltas - elif seen.nat_: - if not seen.numeric_: - if convert_datetime and convert_timedelta: - dtype = dtype_if_all_nat - if dtype is not None: - # otherwise we keep object dtype - result = _infer_all_nats( - dtype, datetimes, timedeltas - ) - - elif convert_datetime: - result = datetimes - elif convert_timedelta: - result = timedeltas + if seen.int_: + if seen.uint_: + result = uints else: - if seen.complex_: - result = complexes - elif seen.float_: - result = floats - elif seen.int_: - if seen.uint_: - result = uints - else: - result = ints - elif seen.is_bool: - result = bools.view(np.bool_) + result = ints else: # don't cast int to float, etc. - if seen.null_: - if seen.is_float_or_complex: - if seen.complex_: - if not seen.int_: - result = complexes - elif seen.float_ or seen.nan_: - if not seen.int_: - result = floats - else: - if not seen.bool_: - if seen.datetime_: - if not seen.numeric_ and not seen.timedelta_: - result = datetimes - elif seen.timedelta_: - if not seen.numeric_: - result = timedeltas - elif seen.nat_: - if not seen.numeric_: - if convert_datetime and convert_timedelta: - dtype = dtype_if_all_nat - if dtype is not None: - # otherwise we keep object dtype - result = _infer_all_nats( - dtype, datetimes, timedeltas - ) - - elif convert_datetime: - result = datetimes - elif convert_timedelta: - result = timedeltas + if seen.int_: + if seen.null_ or seen.nan_ or seen.float_ or seen.complex_: + # we have seen something other than int, so we do not + # convert with safe=True. + pass + else: + if seen.uint_: + result = uints else: - if seen.complex_: - if not seen.int_: - result = complexes - elif seen.float_ or seen.nan_: - if not seen.int_: - result = floats - elif seen.int_: - if seen.uint_: - result = uints - else: - result = ints - elif seen.is_bool and not seen.nan_: - result = bools.view(np.bool_) + result = ints + + else: + if seen.complex_: + result = complexes + elif seen.float_ or seen.nan_: + result = floats if result is uints or result is ints or result is floats or result is complexes: # cast to the largest itemsize when all values are NumPy scalars @@ -2751,22 +2729,24 @@ def maybe_convert_objects(ndarray[object] objects, return objects -cdef _infer_all_nats(dtype, ndarray datetimes, ndarray timedeltas): +cdef _infer_all_nats(dtype, cnp.npy_intp* shape): """ If we have all-NaT values, cast these to the given dtype. """ if cnp.PyArray_DescrCheck(dtype): # i.e. isinstance(dtype, np.dtype): - if dtype == "M8[ns]": - result = datetimes - elif dtype == "m8[ns]": - result = timedeltas + if dtype == "M8[ns]" or dtype == "m8[ns]": + pass else: raise ValueError(dtype) + + i8vals = cnp.PyArray_EMPTY(1, shape, cnp.NPY_INT64, 0) + i8vals.fill(NPY_NAT) + result = i8vals.view(dtype) else: # ExtensionDtype cls = dtype.construct_array_type() - i8vals = cnp.PyArray_EMPTY(1, datetimes.shape, cnp.NPY_INT64, 0) + i8vals = cnp.PyArray_EMPTY(1, shape, cnp.NPY_INT64, 0) i8vals.fill(NPY_NAT) result = cls(i8vals, dtype=dtype) return result diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 10c2349f05dfd..513c723546f9b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1361,6 +1361,8 @@ def _format_with_header(self, header: list[str_t], na_rep: str_t) -> list[str_t] if is_object_dtype(values.dtype): values = cast(np.ndarray, values) + # Only place where we pass safe=True, only needed for + # test_format_missing values = lib.maybe_convert_objects(values, safe=True) result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values]