@@ -987,24 +987,42 @@ def _evaluate_usecols(usecols, names):
987987
988988def _validate_usecols_arg (usecols ):
989989 """
990- Check whether or not the 'usecols' parameter
991- contains all integers (column selection by index),
992- strings (column by name) or is a callable. Raises
993- a ValueError if that is not the case.
990+ Validate the 'usecols' parameter.
991+
992+ Checks whether or not the 'usecols' parameter contains all integers
993+ (column selection by index), strings (column by name) or is a callable.
994+ Raises a ValueError if that is not the case.
995+
996+ Parameters
997+ ----------
998+ usecols : array-like, callable, or None
999+ List of columns to use when parsing or a callable that can be used
1000+ to filter a list of table columns.
1001+
1002+ Returns
1003+ -------
1004+ usecols_tuple : tuple
1005+ A tuple of (verified_usecols, usecols_dtype).
1006+
1007+ 'verified_usecols' is either a set if an array-like is passed in or
1008+ 'usecols' if a callable or None is passed in.
1009+
1010+ 'usecols_dtype` is the inferred dtype of 'usecols' if an array-like
1011+ is passed in or None if a callable or None is passed in.
9941012 """
9951013 msg = ("'usecols' must either be all strings, all unicode, "
9961014 "all integers or a callable" )
9971015
9981016 if usecols is not None :
9991017 if callable (usecols ):
1000- return usecols
1018+ return usecols , None
10011019 usecols_dtype = lib .infer_dtype (usecols )
10021020 if usecols_dtype not in ('empty' , 'integer' ,
10031021 'string' , 'unicode' ):
10041022 raise ValueError (msg )
10051023
1006- return set (usecols )
1007- return usecols
1024+ return set (usecols ), usecols_dtype
1025+ return usecols , None
10081026
10091027
10101028def _validate_parse_dates_arg (parse_dates ):
@@ -1473,7 +1491,8 @@ def __init__(self, src, **kwds):
14731491 self ._reader = _parser .TextReader (src , ** kwds )
14741492
14751493 # XXX
1476- self .usecols = _validate_usecols_arg (self ._reader .usecols )
1494+ self .usecols , self .usecols_dtype = _validate_usecols_arg (
1495+ self ._reader .usecols )
14771496
14781497 passed_names = self .names is None
14791498
@@ -1549,12 +1568,29 @@ def close(self):
15491568 pass
15501569
15511570 def _set_noconvert_columns (self ):
1571+ """
1572+ Set the columns that should not undergo dtype conversions.
1573+
1574+ Currently, any column that is involved with date parsing will not
1575+ undergo such conversions.
1576+ """
15521577 names = self .orig_names
1553- usecols = self .usecols
1578+ if self .usecols_dtype == 'integer' :
1579+ # A set of integers will be converted to a list in
1580+ # the correct order every single time.
1581+ usecols = list (self .usecols )
1582+ elif (callable (self .usecols ) or
1583+ self .usecols_dtype not in ('empty' , None )):
1584+ # The names attribute should have the correct columns
1585+ # in the proper order for indexing with parse_dates.
1586+ usecols = self .names [:]
1587+ else :
1588+ # Usecols is empty.
1589+ usecols = None
15541590
15551591 def _set (x ):
1556- if usecols and is_integer (x ):
1557- x = list ( usecols ) [x ]
1592+ if usecols is not None and is_integer (x ):
1593+ x = usecols [x ]
15581594
15591595 if not is_integer (x ):
15601596 x = names .index (x )
@@ -1792,7 +1828,7 @@ def __init__(self, f, **kwds):
17921828 self .skipinitialspace = kwds ['skipinitialspace' ]
17931829 self .lineterminator = kwds ['lineterminator' ]
17941830 self .quoting = kwds ['quoting' ]
1795- self .usecols = _validate_usecols_arg (kwds ['usecols' ])
1831+ self .usecols , _ = _validate_usecols_arg (kwds ['usecols' ])
17961832 self .skip_blank_lines = kwds ['skip_blank_lines' ]
17971833
17981834 self .names_passed = kwds ['names' ] or None
0 commit comments