11import operator
2- from typing import TYPE_CHECKING , Type , Union
2+ from typing import TYPE_CHECKING , Optional , Type , Union
33
44import numpy as np
55
@@ -122,6 +122,9 @@ class StringArray(PandasArray):
122122
123123 copy : bool, default False
124124 Whether to copy the array of data.
125+ convert : bool, default False
126+ If true, force conversion of non-na scalars to strings.
127+ If False, raises a ValueError, if a scalar is neither a string nor na.
125128
126129 Attributes
127130 ----------
@@ -162,7 +165,15 @@ class StringArray(PandasArray):
162165 ['1', '1']
163166 Length: 2, dtype: string
164167
165- However, instantiating StringArrays directly with non-strings will raise an error.
168+ Instantiating StringArrays directly with non-strings will raise an error unless
169+ ``convert=True``.
170+
171+ >>> pd.arrays.StringArray(['1', 1])
172+ TypeError: Argument 'values' has incorrect type (expected numpy.ndarray, got list)
173+ >>> pd.arrays.StringArray(['1', 1], convert=True)
174+ <StringArray>
175+ ['1', '1']
176+ Length: 2, dtype: string
166177
167178 For comparison methods, `StringArray` returns a :class:`pandas.BooleanArray`:
168179
@@ -175,22 +186,30 @@ class StringArray(PandasArray):
175186 # undo the PandasArray hack
176187 _typ = "extension"
177188
178- def __init__ (self , values , copy = False ):
189+ def __init__ (self , values , copy = False , convert : bool = False ):
179190 values = extract_array (values )
191+ if not isinstance (values , type (self )):
192+ if convert :
193+ values = lib .ensure_string_array (
194+ values , na_value = StringDtype .na_value , copy = copy
195+ )
196+ else :
197+ self ._validate (values )
180198
181199 super ().__init__ (values , copy = copy )
182200 self ._dtype = StringDtype ()
183- if not isinstance (values , type (self )):
184- self ._validate ()
185201
186- def _validate (self ) :
202+ def _validate (self , values : Optional [ np . ndarray ] = None ) -> None :
187203 """Validate that we only store NA or strings."""
188- if len (self ._ndarray ) and not lib .is_string_array (self ._ndarray , skipna = True ):
204+ if values is None :
205+ values = self ._ndarray
206+
207+ if len (values ) and not lib .is_string_array (values , skipna = True ):
189208 raise ValueError ("StringArray requires a sequence of strings or pandas.NA" )
190- if self . _ndarray .dtype != "object" :
209+ if values .dtype != "object" :
191210 raise ValueError (
192211 "StringArray requires a sequence of strings or pandas.NA. Got "
193- f"'{ self . _ndarray .dtype } ' dtype instead."
212+ f"'{ values .dtype } ' dtype instead."
194213 )
195214
196215 @classmethod
@@ -200,12 +219,7 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
200219
201220 result = np .asarray (scalars , dtype = "object" )
202221
203- # convert non-na-likes to str, and nan-likes to StringDtype.na_value
204- result = lib .ensure_string_array (
205- result , na_value = StringDtype .na_value , copy = copy
206- )
207-
208- return cls (result )
222+ return cls (result , copy = copy , convert = True )
209223
210224 @classmethod
211225 def _from_sequence_of_strings (cls , strings , dtype = None , copy = False ):
0 commit comments