@@ -5702,6 +5702,7 @@ def infer_objects(self: FrameOrSeries) -> FrameOrSeries:
57025702 to_datetime : Convert argument to datetime.
57035703 to_timedelta : Convert argument to timedelta.
57045704 to_numeric : Convert argument to numeric type.
5705+ convert_dtypes : Convert argument to best possible dtype.
57055706
57065707 Examples
57075708 --------
@@ -5730,6 +5731,142 @@ def infer_objects(self: FrameOrSeries) -> FrameOrSeries:
57305731 )
57315732 ).__finalize__ (self )
57325733
5734+ def convert_dtypes (
5735+ self : FrameOrSeries ,
5736+ infer_objects : bool_t = True ,
5737+ convert_string : bool_t = True ,
5738+ convert_integer : bool_t = True ,
5739+ convert_boolean : bool_t = True ,
5740+ ) -> FrameOrSeries :
5741+ """
5742+ Convert columns to best possible dtypes using dtypes supporting ``pd.NA``.
5743+
5744+ .. versionadded:: 1.1.0
5745+
5746+ Parameters
5747+ ----------
5748+ infer_objects : bool, default True
5749+ Whether object dtypes should be converted to the best possible types.
5750+ convert_string : bool, default True
5751+ Whether object dtypes should be converted to ``StringDtype()``.
5752+ convert_integer : bool, default True
5753+ Whether, if possible, conversion can be done to integer extension types.
5754+ convert_boolean : bool, defaults True
5755+ Whether object dtypes should be converted to ``BooleanDtypes()``.
5756+
5757+ Returns
5758+ -------
5759+ Series or DataFrame
5760+ Copy of input object with new dtype.
5761+
5762+ See Also
5763+ --------
5764+ infer_objects : Infer dtypes of objects.
5765+ to_datetime : Convert argument to datetime.
5766+ to_timedelta : Convert argument to timedelta.
5767+ to_numeric : Convert argument to a numeric type.
5768+
5769+ Notes
5770+ -----
5771+
5772+ By default, ``convert_dtypes`` will attempt to convert a Series (or each
5773+ Series in a DataFrame) to dtypes that support ``pd.NA``. By using the options
5774+ ``convert_string``, ``convert_integer``, and ``convert_boolean``, it is
5775+ possible to turn off individual conversions to ``StringDtype``, the integer
5776+ extension types or ``BooleanDtype``, respectively.
5777+
5778+ For object-dtyped columns, if ``infer_objects`` is ``True``, use the inference
5779+ rules as during normal Series/DataFrame construction. Then, if possible,
5780+ convert to ``StringDtype``, ``BooleanDtype`` or an appropriate integer extension
5781+ type, otherwise leave as ``object``.
5782+
5783+ If the dtype is integer, convert to an appropriate integer extension type.
5784+
5785+ If the dtype is numeric, and consists of all integers, convert to an
5786+ appropriate integer extension type.
5787+
5788+ In the future, as new dtypes are added that support ``pd.NA``, the results
5789+ of this method will change to support those new dtypes.
5790+
5791+ Examples
5792+ --------
5793+ >>> df = pd.DataFrame(
5794+ ... {
5795+ ... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
5796+ ... "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")),
5797+ ... "c": pd.Series([True, False, np.nan], dtype=np.dtype("O")),
5798+ ... "d": pd.Series(["h", "i", np.nan], dtype=np.dtype("O")),
5799+ ... "e": pd.Series([10, np.nan, 20], dtype=np.dtype("float")),
5800+ ... "f": pd.Series([np.nan, 100.5, 200], dtype=np.dtype("float")),
5801+ ... }
5802+ ... )
5803+
5804+ Start with a DataFrame with default dtypes.
5805+
5806+ >>> df
5807+ a b c d e f
5808+ 0 1 x True h 10.0 NaN
5809+ 1 2 y False i NaN 100.5
5810+ 2 3 z NaN NaN 20.0 200.0
5811+
5812+ >>> df.dtypes
5813+ a int32
5814+ b object
5815+ c object
5816+ d object
5817+ e float64
5818+ f float64
5819+ dtype: object
5820+
5821+ Convert the DataFrame to use best possible dtypes.
5822+
5823+ >>> dfn = df.convert_dtypes()
5824+ >>> dfn
5825+ a b c d e f
5826+ 0 1 x True h 10 NaN
5827+ 1 2 y False i <NA> 100.5
5828+ 2 3 z <NA> <NA> 20 200.0
5829+
5830+ >>> dfn.dtypes
5831+ a Int32
5832+ b string
5833+ c boolean
5834+ d string
5835+ e Int64
5836+ f float64
5837+ dtype: object
5838+
5839+ Start with a Series of strings and missing data represented by ``np.nan``.
5840+
5841+ >>> s = pd.Series(["a", "b", np.nan])
5842+ >>> s
5843+ 0 a
5844+ 1 b
5845+ 2 NaN
5846+ dtype: object
5847+
5848+ Obtain a Series with dtype ``StringDtype``.
5849+
5850+ >>> s.convert_dtypes()
5851+ 0 a
5852+ 1 b
5853+ 2 <NA>
5854+ dtype: string
5855+ """
5856+ if self .ndim == 1 :
5857+ return self ._convert_dtypes (
5858+ infer_objects , convert_string , convert_integer , convert_boolean
5859+ )
5860+ else :
5861+ results = [
5862+ col ._convert_dtypes (
5863+ infer_objects , convert_string , convert_integer , convert_boolean
5864+ )
5865+ for col_name , col in self .items ()
5866+ ]
5867+ result = pd .concat (results , axis = 1 , copy = False )
5868+ return result
5869+
57335870 # ----------------------------------------------------------------------
57345871 # Filling NA's
57355872
0 commit comments