@@ -432,7 +432,8 @@ def parse_dates_safe(dates, delta=False, year=False, days=False):
432432 d = parse_dates_safe (dates , year = True )
433433 conv_dates = d .year
434434 else :
435- raise ValueError ("fmt %s not understood" % fmt )
435+ raise NotImplementedError ("Conversion from format %s "
436+ "is not implemented" % fmt )
436437
437438 conv_dates = Series (conv_dates , dtype = np .float64 )
438439 missing_value = struct .unpack ('<d' , b'\x00 \x00 \x00 \x00 \x00 \x00 \xe0 \x7f ' )[0 ]
@@ -1709,7 +1710,7 @@ def _convert_datetime_to_stata_type(fmt):
17091710 "%tq" , "th" , "%th" , "ty" , "%ty" ]:
17101711 return np .float64 # Stata expects doubles for SIFs
17111712 else :
1712- raise ValueError ( "fmt %s not understood " % fmt )
1713+ raise NotImplementedError ( "Format %s not implemented " % fmt )
17131714
17141715
17151716def _maybe_convert_to_int_keys (convert_dates , varlist ):
@@ -1721,9 +1722,8 @@ def _maybe_convert_to_int_keys(convert_dates, varlist):
17211722 new_dict .update ({varlist .index (key ): convert_dates [key ]})
17221723 else :
17231724 if not isinstance (key , int ):
1724- raise ValueError (
1725- "convert_dates key is not in varlist and is not an int"
1726- )
1725+ raise ValueError ("convert_dates key must be a "
1726+ "column or an integer" )
17271727 new_dict .update ({key : convert_dates [key ]})
17281728 return new_dict
17291729
@@ -1763,8 +1763,7 @@ def _dtype_to_stata_type(dtype, column):
17631763 elif dtype == np .int8 :
17641764 return chr (251 )
17651765 else : # pragma : no cover
1766- raise ValueError ("Data type %s not currently understood. "
1767- "Please report an error to the developers." % dtype )
1766+ raise NotImplementedError ("Data type %s not supported." % dtype )
17681767
17691768
17701769def _dtype_to_default_stata_fmt (dtype , column ):
@@ -1801,35 +1800,36 @@ def _dtype_to_default_stata_fmt(dtype, column):
18011800 elif dtype == np .int8 or dtype == np .int16 :
18021801 return "%8.0g"
18031802 else : # pragma : no cover
1804- raise ValueError ("Data type %s not currently understood. "
1805- "Please report an error to the developers." % dtype )
1803+ raise NotImplementedError ("Data type %s not supported." % dtype )
18061804
18071805
18081806class StataWriter (StataParser ):
18091807 """
1810- A class for writing Stata binary dta files from array-like objects
1808+ A class for writing Stata binary dta files
18111809
18121810 Parameters
18131811 ----------
1814- fname : file path or buffer
1815- Where to save the dta file.
1816- data : array-like
1817- Array-like input to save. Pandas objects are also accepted.
1812+ fname : str or buffer
1813+ String path of file-like object
1814+ data : DataFrame
1815+ Input to save
18181816 convert_dates : dict
1819- Dictionary mapping column of datetime types to the stata internal
1820- format that you want to use for the dates. Options are
1821- 'tc', 'td', 'tm', 'tw', 'th', 'tq', 'ty'. Column can be either a
1822- number or a name.
1817+ Dictionary mapping columns containing datetime types to Stata internal
1818+ format to use when writing the dates. Options are 'tc', 'td', 'tm',
1819+ 'tw', 'th', 'tq', 'ty'. Column can be either an integer or a name.
1820+ Datetime columns that do not have a conversion type specified will be
1821+ converted to 'tc'. Datetime columns with timezone information are not
1822+ supported.
1823+ write_index : bool
1824+ Write the index to Stata dataset.
18231825 encoding : str
1824- Default is latin-1. Note that Stata does not support unicode.
1826+ Default is latin-1. Unicode is not supported
18251827 byteorder : str
1826- Can be ">", "<", "little", or "big". The default is None which uses
1827- `sys.byteorder`
1828+ Can be ">", "<", "little", or "big". default is `sys.byteorder`
18281829 time_stamp : datetime
1829- A date time to use when writing the file. Can be None, in which
1830- case the current time is used.
1830+ A datetime to use as file creation date. Default is the current time
18311831 dataset_label : str
1832- A label for the data set. Should be 80 characters or smaller.
1832+ A label for the data set. Must be 80 characters or smaller.
18331833
18341834 .. versionadded:: 0.19.0
18351835
@@ -1843,6 +1843,17 @@ class StataWriter(StataParser):
18431843 The StataWriter instance has a write_file method, which will
18441844 write the file to the given `fname`.
18451845
1846+ Raises
1847+ ------
1848+ NotImplementedError
1849+ * If datetimes contain timezone information
1850+ * Column dtype is not representable in Stata
1851+ ValueError
1852+ * Columns listed in convert_dates are contain values other than
1853+ datetime64[ns] or datetime.datetime
1854+ * Column listed in convert_dates is not in DataFrame
1855+ * Categorical label contains more than 32,000 characters
1856+
18461857 Examples
18471858 --------
18481859 >>> import pandas as pd
@@ -1861,7 +1872,7 @@ def __init__(self, fname, data, convert_dates=None, write_index=True,
18611872 encoding = "latin-1" , byteorder = None , time_stamp = None ,
18621873 data_label = None , variable_labels = None ):
18631874 super (StataWriter , self ).__init__ (encoding )
1864- self ._convert_dates = convert_dates
1875+ self ._convert_dates = {} if convert_dates is None else convert_dates
18651876 self ._write_index = write_index
18661877 self ._time_stamp = time_stamp
18671878 self ._data_label = data_label
@@ -2041,15 +2052,22 @@ def _prepare_pandas(self, data):
20412052 self .varlist = data .columns .tolist ()
20422053
20432054 dtypes = data .dtypes
2044- if self ._convert_dates is not None :
2045- self ._convert_dates = _maybe_convert_to_int_keys (
2046- self ._convert_dates , self .varlist
2055+
2056+ # Ensure all date columns are converted
2057+ for col in data :
2058+ if col in self ._convert_dates :
2059+ continue
2060+ if is_datetime64_dtype (data [col ]):
2061+ self ._convert_dates [col ] = 'tc'
2062+
2063+ self ._convert_dates = _maybe_convert_to_int_keys (self ._convert_dates ,
2064+ self .varlist )
2065+ for key in self ._convert_dates :
2066+ new_type = _convert_datetime_to_stata_type (
2067+ self ._convert_dates [key ]
20472068 )
2048- for key in self ._convert_dates :
2049- new_type = _convert_datetime_to_stata_type (
2050- self ._convert_dates [key ]
2051- )
2052- dtypes [key ] = np .dtype (new_type )
2069+ dtypes [key ] = np .dtype (new_type )
2070+
20532071 self .typlist = []
20542072 self .fmtlist = []
20552073 for col , dtype in dtypes .iteritems ():
0 commit comments