99
1010from pandas ._typing import (
1111 CompressionOptions ,
12+ ConvertersArg ,
13+ DtypeArg ,
1214 FilePath ,
15+ ParseDatesArg ,
1316 ReadBuffer ,
1417 StorageOptions ,
1518 XMLParsers ,
@@ -67,6 +70,23 @@ class _XMLFrameParser:
6770 names : list
6871 Column names for Data Frame of parsed XML data.
6972
73+ dtype : dict
74+ Data type for data or columns. E.g. {{'a': np.float64,
75+ 'b': np.int32, 'c': 'Int64'}}
76+
77+ .. versionadded:: 1.5.0
78+
79+ converters : dict, optional
80+ Dict of functions for converting values in certain columns. Keys can
81+ either be integers or column labels.
82+
83+ .. versionadded:: 1.5.0
84+
85+ parse_dates : bool or list of int or names or list of lists or dict
86+ Converts either index or select columns to datetimes
87+
88+ .. versionadded:: 1.5.0
89+
7090 encoding : str
7191 Encoding of xml object or document.
7292
@@ -109,6 +129,9 @@ def __init__(
109129 elems_only : bool ,
110130 attrs_only : bool ,
111131 names : Sequence [str ] | None ,
132+ dtype : DtypeArg | None ,
133+ converters : ConvertersArg | None ,
134+ parse_dates : ParseDatesArg | None ,
112135 encoding : str | None ,
113136 stylesheet : FilePath | ReadBuffer [bytes ] | ReadBuffer [str ] | None ,
114137 compression : CompressionOptions ,
@@ -120,6 +143,9 @@ def __init__(
120143 self .elems_only = elems_only
121144 self .attrs_only = attrs_only
122145 self .names = names
146+ self .dtype = dtype
147+ self .converters = converters
148+ self .parse_dates = parse_dates
123149 self .encoding = encoding
124150 self .stylesheet = stylesheet
125151 self .is_style = None
@@ -671,6 +697,9 @@ def _parse(
671697 elems_only : bool ,
672698 attrs_only : bool ,
673699 names : Sequence [str ] | None ,
700+ dtype : DtypeArg | None ,
701+ converters : ConvertersArg | None ,
702+ parse_dates : ParseDatesArg | None ,
674703 encoding : str | None ,
675704 parser : XMLParsers ,
676705 stylesheet : FilePath | ReadBuffer [bytes ] | ReadBuffer [str ] | None ,
@@ -706,6 +735,9 @@ def _parse(
706735 elems_only ,
707736 attrs_only ,
708737 names ,
738+ dtype ,
739+ converters ,
740+ parse_dates ,
709741 encoding ,
710742 stylesheet ,
711743 compression ,
@@ -722,6 +754,9 @@ def _parse(
722754 elems_only ,
723755 attrs_only ,
724756 names ,
757+ dtype ,
758+ converters ,
759+ parse_dates ,
725760 encoding ,
726761 stylesheet ,
727762 compression ,
@@ -732,7 +767,13 @@ def _parse(
732767
733768 data_dicts = p .parse_data ()
734769
735- return _data_to_frame (data = data_dicts , ** kwargs )
770+ return _data_to_frame (
771+ data = data_dicts ,
772+ dtype = dtype ,
773+ converters = converters ,
774+ parse_dates = parse_dates ,
775+ ** kwargs ,
776+ )
736777
737778
738779@deprecate_nonkeyword_arguments (
@@ -749,6 +790,9 @@ def read_xml(
749790 elems_only : bool = False ,
750791 attrs_only : bool = False ,
751792 names : Sequence [str ] | None = None ,
793+ dtype : DtypeArg | None = None ,
794+ converters : ConvertersArg | None = None ,
795+ parse_dates : ParseDatesArg | None = None ,
752796 # encoding can not be None for lxml and StringIO input
753797 encoding : str | None = "utf-8" ,
754798 parser : XMLParsers = "lxml" ,
@@ -799,6 +843,35 @@ def read_xml(
799843 Column names for DataFrame of parsed XML data. Use this parameter to
800844 rename original element names and distinguish same named elements.
801845
846+ dtype : Type name or dict of column -> type, optional
847+ Data type for data or columns. E.g. {{'a': np.float64, 'b': np.int32,
848+ 'c': 'Int64'}}
849+ Use `str` or `object` together with suitable `na_values` settings
850+ to preserve and not interpret dtype.
851+ If converters are specified, they will be applied INSTEAD
852+ of dtype conversion.
853+
854+ .. versionadded:: 1.5.0
855+
856+ converters : dict, optional
857+ Dict of functions for converting values in certain columns. Keys can either
858+ be integers or column labels.
859+
860+ .. versionadded:: 1.5.0
861+
862+ parse_dates : bool or list of int or names or list of lists or dict, default False
863+ Identifiers to parse index or columns to datetime. The behavior is as follows:
864+
865+ * boolean. If True -> try parsing the index.
866+ * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3
867+ each as a separate date column.
868+ * list of lists. e.g. If [[1, 3]] -> combine columns 1 and 3 and parse as
869+ a single date column.
870+ * dict, e.g. {{'foo' : [1, 3]}} -> parse columns 1, 3 as date and call
871+ result 'foo'
872+
873+ .. versionadded:: 1.5.0
874+
802875 encoding : str, optional, default 'utf-8'
803876 Encoding of XML document.
804877
@@ -942,6 +1015,9 @@ def read_xml(
9421015 elems_only = elems_only ,
9431016 attrs_only = attrs_only ,
9441017 names = names ,
1018+ dtype = dtype ,
1019+ converters = converters ,
1020+ parse_dates = parse_dates ,
9451021 encoding = encoding ,
9461022 parser = parser ,
9471023 stylesheet = stylesheet ,
0 commit comments