66"""
77
88from datetime import datetime
9+ import nose
910
1011import pandas .util .testing as tm
1112
@@ -22,9 +23,8 @@ def test_raise_on_mixed_dtype_usecols(self):
2223 1000,2000,3000
2324 4000,5000,6000
2425 """
25- msg = ("The elements of \' usecols\' "
26- "must either be all strings "
27- "or all integers" )
26+ msg = ("The elements of 'usecols' must "
27+ "either be all strings, all unicode, or all integers" )
2828 usecols = [0 , 'b' , 2 ]
2929
3030 with tm .assertRaisesRegexp (ValueError , msg ):
@@ -254,3 +254,103 @@ def test_usecols_with_parse_dates_and_usecol_names(self):
254254 usecols = [3 , 0 , 2 ],
255255 parse_dates = parse_dates )
256256 tm .assert_frame_equal (df , expected )
257+
258+ def test_usecols_with_unicode_strings (self ):
259+ # see gh-13219
260+
261+ s = '''AAA,BBB,CCC,DDD
262+ 0.056674973,8,True,a
263+ 2.613230982,2,False,b
264+ 3.568935038,7,False,a
265+ '''
266+
267+ data = {
268+ 'AAA' : {
269+ 0 : 0.056674972999999997 ,
270+ 1 : 2.6132309819999997 ,
271+ 2 : 3.5689350380000002
272+ },
273+ 'BBB' : {0 : 8 , 1 : 2 , 2 : 7 }
274+ }
275+ expected = DataFrame (data )
276+
277+ df = self .read_csv (StringIO (s ), usecols = [u'AAA' , u'BBB' ])
278+ tm .assert_frame_equal (df , expected )
279+
280+ def test_usecols_with_single_byte_unicode_strings (self ):
281+ # see gh-13219
282+
283+ s = '''A,B,C,D
284+ 0.056674973,8,True,a
285+ 2.613230982,2,False,b
286+ 3.568935038,7,False,a
287+ '''
288+
289+ data = {
290+ 'A' : {
291+ 0 : 0.056674972999999997 ,
292+ 1 : 2.6132309819999997 ,
293+ 2 : 3.5689350380000002
294+ },
295+ 'B' : {0 : 8 , 1 : 2 , 2 : 7 }
296+ }
297+ expected = DataFrame (data )
298+
299+ df = self .read_csv (StringIO (s ), usecols = [u'A' , u'B' ])
300+ tm .assert_frame_equal (df , expected )
301+
302+ def test_usecols_with_mixed_encoding_strings (self ):
303+ s = '''AAA,BBB,CCC,DDD
304+ 0.056674973,8,True,a
305+ 2.613230982,2,False,b
306+ 3.568935038,7,False,a
307+ '''
308+
309+ msg = ("The elements of 'usecols' must "
310+ "either be all strings, all unicode, or all integers" )
311+
312+ with tm .assertRaisesRegexp (ValueError , msg ):
313+ self .read_csv (StringIO (s ), usecols = [u'AAA' , b'BBB' ])
314+
315+ with tm .assertRaisesRegexp (ValueError , msg ):
316+ self .read_csv (StringIO (s ), usecols = [b'AAA' , u'BBB' ])
317+
318+ def test_usecols_with_multibyte_characters (self ):
319+ s = '''あああ,いい,ううう,ええええ
320+ 0.056674973,8,True,a
321+ 2.613230982,2,False,b
322+ 3.568935038,7,False,a
323+ '''
324+ data = {
325+ 'あああ' : {
326+ 0 : 0.056674972999999997 ,
327+ 1 : 2.6132309819999997 ,
328+ 2 : 3.5689350380000002
329+ },
330+ 'いい' : {0 : 8 , 1 : 2 , 2 : 7 }
331+ }
332+ expected = DataFrame (data )
333+
334+ df = self .read_csv (StringIO (s ), usecols = ['あああ' , 'いい' ])
335+ tm .assert_frame_equal (df , expected )
336+
337+ def test_usecols_with_multibyte_unicode_characters (self ):
338+ raise nose .SkipTest ('TODO: see gh-13253' )
339+
340+ s = '''あああ,いい,ううう,ええええ
341+ 0.056674973,8,True,a
342+ 2.613230982,2,False,b
343+ 3.568935038,7,False,a
344+ '''
345+ data = {
346+ 'あああ' : {
347+ 0 : 0.056674972999999997 ,
348+ 1 : 2.6132309819999997 ,
349+ 2 : 3.5689350380000002
350+ },
351+ 'いい' : {0 : 8 , 1 : 2 , 2 : 7 }
352+ }
353+ expected = DataFrame (data )
354+
355+ df = self .read_csv (StringIO (s ), usecols = [u'あああ' , u'いい' ])
356+ tm .assert_frame_equal (df , expected )
0 commit comments