@@ -7,9 +7,8 @@ import warnings
77
88from csv import QUOTE_MINIMAL, QUOTE_NONNUMERIC, QUOTE_NONE
99
10- from libc.stdio cimport fopen, fclose
11- from libc.stdlib cimport malloc, free
12- from libc.string cimport strncpy, strlen, strcmp, strcasecmp
10+ from libc.stdlib cimport free
11+ from libc.string cimport strncpy, strlen, strcasecmp
1312
1413cimport cython
1514from cython cimport Py_ssize_t
@@ -27,9 +26,6 @@ cdef extern from "Python.h":
2726 object PyUnicode_Decode(char * v, Py_ssize_t size, char * encoding,
2827 char * errors)
2928
30- cdef extern from " stdlib.h" :
31- void memcpy(void * dst, void * src, size_t n)
32-
3329
3430import numpy as np
3531cimport numpy as cnp
@@ -50,7 +46,7 @@ from khash cimport (
5046
5147import pandas.compat as compat
5248from pandas.core.dtypes.common import (
53- is_categorical_dtype, CategoricalDtype,
49+ is_categorical_dtype,
5450 is_integer_dtype, is_float_dtype,
5551 is_bool_dtype, is_object_dtype,
5652 is_datetime64_dtype,
9086except NameError :
9187 basestring = str
9288
93- cdef extern from " src/numpy_helper.h" :
94- void transfer_object_column(char * dst, char * src, size_t stride,
95- size_t length)
9689
9790cdef extern from " parser/tokenizer.h" :
9891
@@ -232,8 +225,6 @@ cdef extern from "parser/tokenizer.h":
232225
233226 int parser_trim_buffers(parser_t * self )
234227
235- void debug_print_parser(parser_t * self )
236-
237228 int tokenize_all_rows(parser_t * self ) nogil
238229 int tokenize_nrows(parser_t * self , size_t nrows) nogil
239230
@@ -249,7 +240,6 @@ cdef extern from "parser/tokenizer.h":
249240 double round_trip(const char * p, char ** q, char decimal, char sci,
250241 char tsep, int skip_trailing) nogil
251242
252- int to_longlong(char * item, long long * p_value) nogil
253243 int to_boolean(const char * item, uint8_t * val) nogil
254244
255245
@@ -875,9 +865,6 @@ cdef class TextReader:
875865
876866 return header, field_count
877867
878- cdef _implicit_index_count(self ):
879- pass
880-
881868 def read (self , rows = None ):
882869 """
883870 rows=None --> read all rows
@@ -997,9 +984,6 @@ cdef class TextReader:
997984
998985 return columns
999986
1000- def debug_print (self ):
1001- debug_print_parser(self .parser)
1002-
1003987 cdef _start_clock(self ):
1004988 self .clocks.append(time.time())
1005989
@@ -1346,6 +1330,7 @@ cdef class TextReader:
13461330 else :
13471331 return None
13481332
1333+
13491334cdef object _true_values = [b' True' , b' TRUE' , b' true' ]
13501335cdef object _false_values = [b' False' , b' FALSE' , b' false' ]
13511336
@@ -1375,21 +1360,6 @@ cdef asbytes(object o):
13751360_NA_VALUES = _ensure_encoded(list (com._NA_VALUES))
13761361
13771362
1378- def _is_file_like (obj ):
1379- if PY3:
1380- import io
1381- if isinstance (obj, io.TextIOWrapper):
1382- raise ParserError(' Cannot handle open unicode files (yet)' )
1383-
1384- # BufferedReader is a byte reader for Python 3
1385- file = io.BufferedReader
1386- else :
1387- import __builtin__
1388- file = __builtin__.file
1389-
1390- return isinstance (obj, (basestring , file ))
1391-
1392-
13931363def _maybe_upcast (arr ):
13941364 """
13951365
@@ -1479,6 +1449,7 @@ cdef _string_box_factorize(parser_t *parser, int64_t col,
14791449
14801450 return result, na_count
14811451
1452+
14821453cdef _string_box_utf8(parser_t * parser, int64_t col,
14831454 int64_t line_start, int64_t line_end,
14841455 bint na_filter, kh_str_t * na_hashset):
@@ -1532,6 +1503,7 @@ cdef _string_box_utf8(parser_t *parser, int64_t col,
15321503
15331504 return result, na_count
15341505
1506+
15351507cdef _string_box_decode(parser_t * parser, int64_t col,
15361508 int64_t line_start, int64_t line_end,
15371509 bint na_filter, kh_str_t * na_hashset,
@@ -1662,6 +1634,7 @@ cdef _categorical_convert(parser_t *parser, int64_t col,
16621634 kh_destroy_str(table)
16631635 return np.asarray(codes), result, na_count
16641636
1637+
16651638cdef _to_fw_string(parser_t * parser, int64_t col, int64_t line_start,
16661639 int64_t line_end, int64_t width):
16671640 cdef:
@@ -1679,6 +1652,7 @@ cdef _to_fw_string(parser_t *parser, int64_t col, int64_t line_start,
16791652
16801653 return result
16811654
1655+
16821656cdef inline void _to_fw_string_nogil(parser_t * parser, int64_t col,
16831657 int64_t line_start, int64_t line_end,
16841658 size_t width, char * data) nogil:
@@ -1694,10 +1668,12 @@ cdef inline void _to_fw_string_nogil(parser_t *parser, int64_t col,
16941668 strncpy(data, word, width)
16951669 data += width
16961670
1671+
16971672cdef char * cinf = b' inf'
16981673cdef char * cposinf = b' +inf'
16991674cdef char * cneginf = b' -inf'
17001675
1676+
17011677cdef _try_double(parser_t * parser, int64_t col,
17021678 int64_t line_start, int64_t line_end,
17031679 bint na_filter, kh_str_t * na_hashset, object na_flist):
@@ -1738,6 +1714,7 @@ cdef _try_double(parser_t *parser, int64_t col,
17381714 return None , None
17391715 return result, na_count
17401716
1717+
17411718cdef inline int _try_double_nogil(parser_t * parser,
17421719 double (* double_converter)(
17431720 const char * , char ** , char ,
@@ -1808,6 +1785,7 @@ cdef inline int _try_double_nogil(parser_t *parser,
18081785
18091786 return 0
18101787
1788+
18111789cdef _try_uint64(parser_t * parser, int64_t col,
18121790 int64_t line_start, int64_t line_end,
18131791 bint na_filter, kh_str_t * na_hashset):
@@ -1843,6 +1821,7 @@ cdef _try_uint64(parser_t *parser, int64_t col,
18431821
18441822 return result
18451823
1824+
18461825cdef inline int _try_uint64_nogil(parser_t * parser, int64_t col,
18471826 int64_t line_start,
18481827 int64_t line_end, bint na_filter,
@@ -1881,6 +1860,7 @@ cdef inline int _try_uint64_nogil(parser_t *parser, int64_t col,
18811860
18821861 return 0
18831862
1863+
18841864cdef _try_int64(parser_t * parser, int64_t col,
18851865 int64_t line_start, int64_t line_end,
18861866 bint na_filter, kh_str_t * na_hashset):
@@ -1909,6 +1889,7 @@ cdef _try_int64(parser_t *parser, int64_t col,
19091889
19101890 return result, na_count
19111891
1892+
19121893cdef inline int _try_int64_nogil(parser_t * parser, int64_t col,
19131894 int64_t line_start,
19141895 int64_t line_end, bint na_filter,
@@ -1948,69 +1929,6 @@ cdef inline int _try_int64_nogil(parser_t *parser, int64_t col,
19481929
19491930 return 0
19501931
1951- cdef _try_bool(parser_t * parser, int64_t col,
1952- int64_t line_start, int64_t line_end,
1953- bint na_filter, kh_str_t * na_hashset):
1954- cdef:
1955- int na_count
1956- Py_ssize_t lines = line_end - line_start
1957- uint8_t * data
1958- cnp.ndarray[cnp.uint8_t, ndim= 1 ] result
1959-
1960- uint8_t NA = na_values[np.bool_]
1961-
1962- result = np.empty(lines)
1963- data = < uint8_t * > result.data
1964-
1965- with nogil:
1966- error = _try_bool_nogil(parser, col, line_start,
1967- line_end, na_filter,
1968- na_hashset, NA, data,
1969- & na_count)
1970- if error != 0 :
1971- return None , None
1972- return result.view(np.bool_), na_count
1973-
1974- cdef inline int _try_bool_nogil(parser_t * parser, int64_t col,
1975- int64_t line_start,
1976- int64_t line_end, bint na_filter,
1977- const kh_str_t * na_hashset, uint8_t NA,
1978- uint8_t * data, int * na_count) nogil:
1979- cdef:
1980- int error
1981- Py_ssize_t i, lines = line_end - line_start
1982- coliter_t it
1983- const char * word = NULL
1984- khiter_t k
1985- na_count[0 ] = 0
1986-
1987- coliter_setup(& it, parser, col, line_start)
1988-
1989- if na_filter:
1990- for i in range (lines):
1991- COLITER_NEXT(it, word)
1992-
1993- k = kh_get_str(na_hashset, word)
1994- # in the hash table
1995- if k != na_hashset.n_buckets:
1996- na_count[0 ] += 1
1997- data[0 ] = NA
1998- data += 1
1999- continue
2000-
2001- error = to_boolean(word, data)
2002- if error != 0 :
2003- return error
2004- data += 1
2005- else :
2006- for i in range (lines):
2007- COLITER_NEXT(it, word)
2008-
2009- error = to_boolean(word, data)
2010- if error != 0 :
2011- return error
2012- data += 1
2013- return 0
20141932
20151933cdef _try_bool_flex(parser_t * parser, int64_t col,
20161934 int64_t line_start, int64_t line_end,
@@ -2039,6 +1957,7 @@ cdef _try_bool_flex(parser_t *parser, int64_t col,
20391957 return None , None
20401958 return result.view(np.bool_), na_count
20411959
1960+
20421961cdef inline int _try_bool_flex_nogil(parser_t * parser, int64_t col,
20431962 int64_t line_start,
20441963 int64_t line_end, bint na_filter,
@@ -2131,6 +2050,7 @@ cdef kh_str_t* kset_from_list(list values) except NULL:
21312050
21322051 return table
21332052
2053+
21342054cdef kh_float64_t* kset_float64_from_list(values) except NULL :
21352055 # caller takes responsibility for freeing the hash table
21362056 cdef:
0 commit comments