1313import pandas as pd
1414from pandas .core .frame import DataFrame , Series
1515from pandas .io .parsers import read_csv
16- from pandas .io .stata import read_stata , StataReader , InvalidColumnName
16+ from pandas .io .stata import (read_stata , StataReader , InvalidColumnName ,
17+ PossiblePrecisionLoss )
1718import pandas .util .testing as tm
1819from pandas .util .misc import is_little_endian
1920from pandas import compat
@@ -142,8 +143,7 @@ def test_read_dta2(self):
142143 parsed_117 = self .read_dta (self .dta2_117 )
143144 # 113 is buggy due ot limits date format support in Stata
144145 # parsed_113 = self.read_dta(self.dta2_113)
145-
146- np .testing .assert_equal (
146+ tm .assert_equal (
147147 len (w ), 1 ) # should get a warning for that format.
148148
149149 # buggy test because of the NaT comparison on certain platforms
@@ -206,7 +206,7 @@ def test_read_write_dta5(self):
206206 original .index .name = 'index'
207207
208208 with tm .ensure_clean () as path :
209- original .to_stata (path , None , False )
209+ original .to_stata (path , None )
210210 written_and_read_again = self .read_dta (path )
211211 tm .assert_frame_equal (written_and_read_again .set_index ('index' ),
212212 original )
@@ -221,7 +221,7 @@ def test_write_dta6(self):
221221 original ['quarter' ] = original ['quarter' ].astype (np .int32 )
222222
223223 with tm .ensure_clean () as path :
224- original .to_stata (path , None , False )
224+ original .to_stata (path , None )
225225 written_and_read_again = self .read_dta (path )
226226 tm .assert_frame_equal (written_and_read_again .set_index ('index' ),
227227 original )
@@ -257,7 +257,7 @@ def test_read_write_dta10(self):
257257 original ['integer' ] = original ['integer' ].astype (np .int32 )
258258
259259 with tm .ensure_clean () as path :
260- original .to_stata (path , {'datetime' : 'tc' }, False )
260+ original .to_stata (path , {'datetime' : 'tc' })
261261 written_and_read_again = self .read_dta (path )
262262 tm .assert_frame_equal (written_and_read_again .set_index ('index' ),
263263 original )
@@ -295,9 +295,9 @@ def test_read_write_dta11(self):
295295
296296 with tm .ensure_clean () as path :
297297 with warnings .catch_warnings (record = True ) as w :
298- original .to_stata (path , None , False )
299- np . testing . assert_equal (
300- len (w ), 1 ) # should get a warning for that format.
298+ original .to_stata (path , None )
299+ # should get a warning for that format.
300+ tm . assert_equal ( len (w ), 1 )
301301
302302 written_and_read_again = self .read_dta (path )
303303 tm .assert_frame_equal (written_and_read_again .set_index ('index' ), formatted )
@@ -324,13 +324,12 @@ def test_read_write_dta12(self):
324324
325325 with tm .ensure_clean () as path :
326326 with warnings .catch_warnings (record = True ) as w :
327- original .to_stata (path , None , False )
328- np .testing .assert_equal (
329- len (w ), 1 ) # should get a warning for that format.
327+ original .to_stata (path , None )
328+ tm .assert_equal (len (w ), 1 ) # should get a warning for that format.
330329
331330 written_and_read_again = self .read_dta (path )
332331 tm .assert_frame_equal (written_and_read_again .set_index ('index' ), formatted )
333-
332+
334333 def test_read_write_dta13 (self ):
335334 s1 = Series (2 ** 9 , dtype = np .int16 )
336335 s2 = Series (2 ** 17 , dtype = np .int32 )
@@ -366,7 +365,7 @@ def test_read_write_reread_dta14(self):
366365 tm .assert_frame_equal (parsed_114 , parsed_115 )
367366
368367 with tm .ensure_clean () as path :
369- parsed_114 .to_stata (path , {'date_td' : 'td' }, write_index = False )
368+ parsed_114 .to_stata (path , {'date_td' : 'td' })
370369 written_and_read_again = self .read_dta (path )
371370 tm .assert_frame_equal (written_and_read_again .set_index ('index' ), parsed_114 )
372371
@@ -406,7 +405,7 @@ def test_numeric_column_names(self):
406405 with warnings .catch_warnings (record = True ) as w :
407406 tm .assert_produces_warning (original .to_stata (path ), InvalidColumnName )
408407 # should produce a single warning
409- np . testing .assert_equal (len (w ), 1 )
408+ tm .assert_equal (len (w ), 1 )
410409
411410 written_and_read_again = self .read_dta (path )
412411 written_and_read_again = written_and_read_again .set_index ('index' )
@@ -415,7 +414,102 @@ def test_numeric_column_names(self):
415414 written_and_read_again .columns = map (convert_col_name , columns )
416415 tm .assert_frame_equal (original , written_and_read_again )
417416
417+ def test_nan_to_missing_value (self ):
418+ s1 = Series (np .arange (4.0 ), dtype = np .float32 )
419+ s2 = Series (np .arange (4.0 ), dtype = np .float64 )
420+ s1 [::2 ] = np .nan
421+ s2 [1 ::2 ] = np .nan
422+ original = DataFrame ({'s1' : s1 , 's2' : s2 })
423+ original .index .name = 'index'
424+ with tm .ensure_clean () as path :
425+ original .to_stata (path )
426+ written_and_read_again = self .read_dta (path )
427+ written_and_read_again = written_and_read_again .set_index ('index' )
428+ tm .assert_frame_equal (written_and_read_again , original )
429+
430+ def test_no_index (self ):
431+ columns = ['x' , 'y' ]
432+ original = DataFrame (np .reshape (np .arange (10.0 ), (5 , 2 )),
433+ columns = columns )
434+ original .index .name = 'index_not_written'
435+ with tm .ensure_clean () as path :
436+ original .to_stata (path , write_index = False )
437+ written_and_read_again = self .read_dta (path )
438+ tm .assertRaises (KeyError ,
439+ lambda : written_and_read_again ['index_not_written' ])
440+
441+ def test_string_no_dates (self ):
442+ s1 = Series (['a' , 'A longer string' ])
443+ s2 = Series ([1.0 , 2.0 ], dtype = np .float64 )
444+ original = DataFrame ({'s1' : s1 , 's2' : s2 })
445+ original .index .name = 'index'
446+ with tm .ensure_clean () as path :
447+ original .to_stata (path )
448+ written_and_read_again = self .read_dta (path )
449+ tm .assert_frame_equal (written_and_read_again .set_index ('index' ),
450+ original )
451+
452+ def test_large_value_conversion (self ):
453+ s0 = Series ([1 , 99 ], dtype = np .int8 )
454+ s1 = Series ([1 , 127 ], dtype = np .int8 )
455+ s2 = Series ([1 , 2 ** 15 - 1 ], dtype = np .int16 )
456+ s3 = Series ([1 , 2 ** 63 - 1 ], dtype = np .int64 )
457+ original = DataFrame ({'s0' : s0 , 's1' : s1 , 's2' : s2 , 's3' : s3 })
458+ original .index .name = 'index'
459+ with tm .ensure_clean () as path :
460+ with warnings .catch_warnings (record = True ) as w :
461+ tm .assert_produces_warning (original .to_stata (path ),
462+ PossiblePrecisionLoss )
463+ # should produce a single warning
464+ tm .assert_equal (len (w ), 1 )
465+
466+ written_and_read_again = self .read_dta (path )
467+ modified = original .copy ()
468+ modified ['s1' ] = Series (modified ['s1' ], dtype = np .int16 )
469+ modified ['s2' ] = Series (modified ['s2' ], dtype = np .int32 )
470+ modified ['s3' ] = Series (modified ['s3' ], dtype = np .float64 )
471+ tm .assert_frame_equal (written_and_read_again .set_index ('index' ),
472+ modified )
473+
474+ def test_dates_invalid_column (self ):
475+ original = DataFrame ([datetime (2006 , 11 , 19 , 23 , 13 , 20 )])
476+ original .index .name = 'index'
477+ with tm .ensure_clean () as path :
478+ with warnings .catch_warnings (record = True ) as w :
479+ tm .assert_produces_warning (original .to_stata (path , {0 : 'tc' }),
480+ InvalidColumnName )
481+ tm .assert_equal (len (w ), 1 )
482+
483+ written_and_read_again = self .read_dta (path )
484+ modified = original .copy ()
485+ modified .columns = ['_0' ]
486+ tm .assert_frame_equal (written_and_read_again .set_index ('index' ),
487+ modified )
488+
489+ def test_date_export_formats (self ):
490+ columns = ['tc' , 'td' , 'tw' , 'tm' , 'tq' , 'th' , 'ty' ]
491+ conversions = dict (((c , c ) for c in columns ))
492+ data = [datetime (2006 , 11 , 20 , 23 , 13 , 20 )] * len (columns )
493+ original = DataFrame ([data ], columns = columns )
494+ original .index .name = 'index'
495+ expected_values = [datetime (2006 , 11 , 20 , 23 , 13 , 20 ), # Time
496+ datetime (2006 , 11 , 20 ), # Day
497+ datetime (2006 , 11 , 19 ), # Week
498+ datetime (2006 , 11 , 1 ), # Month
499+ datetime (2006 , 10 , 1 ), # Quarter year
500+ datetime (2006 , 7 , 1 ), # Half year
501+ datetime (2006 , 1 , 1 )] # Year
502+
503+ expected = DataFrame ([expected_values ], columns = columns )
504+ expected .index .name = 'index'
505+ with tm .ensure_clean () as path :
506+ original .to_stata (path , conversions )
507+ written_and_read_again = self .read_dta (path )
508+ tm .assert_frame_equal (written_and_read_again .set_index ('index' ),
509+ expected )
510+
418511
419512if __name__ == '__main__' :
420513 nose .runmodule (argv = [__file__ , '-vvs' , '-x' , '--pdb' , '--pdb-failure' ],
421514 exit = False )
515+
0 commit comments