@@ -801,7 +801,6 @@ cdef class TextReader:
801801 raise StopIteration
802802 self ._end_clock(' Tokenization' )
803803
804-
805804 self ._start_clock()
806805 columns = self ._convert_column_data(rows = rows,
807806 footer = footer,
@@ -840,11 +839,12 @@ cdef class TextReader:
840839
841840 def _convert_column_data (self , rows = None , upcast_na = False , footer = 0 ):
842841 cdef:
843- Py_ssize_t i, nused, ncols
842+ Py_ssize_t i, nused
844843 kh_str_t * na_hashset = NULL
845844 int start, end
846845 object name, na_flist
847846 bint na_filter = 0
847+ Py_ssize_t num_cols
848848
849849 start = self .parser_start
850850
@@ -857,6 +857,22 @@ cdef class TextReader:
857857 # if footer > 0:
858858 # end -= footer
859859
860+ # print >> sys.stderr, self.table_width
861+ # print >> sys.stderr, self.leading_cols
862+ # print >> sys.stderr, self.parser.lines
863+ # print >> sys.stderr, start
864+ # print >> sys.stderr, end
865+ # print >> sys.stderr, self.header
866+ # print >> sys.stderr, "index"
867+ num_cols = - 1
868+ for i in range (self .parser.lines):
869+ num_cols = (num_cols < self .parser.line_fields[i]) * self .parser.line_fields[i] + \
870+ (num_cols >= self .parser.line_fields[i]) * num_cols
871+
872+ if self .table_width - self .leading_cols > num_cols:
873+ raise CParserError(" Too many columns specified: expected %s and found %s " %
874+ (self .table_width - self .leading_cols, num_cols))
875+
860876 results = {}
861877 nused = 0
862878 for i in range (self .table_width):
@@ -1446,7 +1462,6 @@ cdef _try_int64(parser_t *parser, int col, int line_start, int line_end,
14461462 if na_filter:
14471463 for i in range (lines):
14481464 word = COLITER_NEXT(it)
1449-
14501465 k = kh_get_str(na_hashset, word)
14511466 # in the hash table
14521467 if k != na_hashset.n_buckets:
@@ -1828,16 +1843,6 @@ cdef _apply_converter(object f, parser_t *parser, int col,
18281843
18291844 return lib.maybe_convert_objects(result)
18301845
1831- # if issubclass(values.dtype.type, (np.number, np.bool_)):
1832- # return values
1833-
1834- # # XXX
1835- # na_values = set([''])
1836- # try:
1837- # return lib.maybe_convert_numeric(values, na_values, False)
1838- # except Exception:
1839- # na_count = lib.sanitize_objects(values, na_values, False)
1840- # return result
18411846
18421847def _to_structured_array (dict columns , object names ):
18431848 cdef:
0 commit comments