@@ -85,9 +85,9 @@ cdef class {{name}}Vector:
8585 self.ao = np.empty(self.data.m, dtype={{idtype}})
8686 self.data.data = <{{arg}}*> self.ao.data
8787
88- cdef resize(self):
88+ cdef resize(self, refcheck=True ):
8989 self.data.m = max(self.data.m * 4, _INIT_VEC_CAP)
90- self.ao.resize(self.data.m)
90+ self.ao.resize(self.data.m, refcheck=refcheck )
9191 self.data.data = <{{arg}}*> self.ao.data
9292
9393 def __dealloc__(self):
@@ -98,15 +98,15 @@ cdef class {{name}}Vector:
9898 def __len__(self):
9999 return self.data.n
100100
101- cpdef to_array(self):
102- self.ao.resize(self.data.n)
101+ cpdef to_array(self, refcheck=True ):
102+ self.ao.resize(self.data.n, refcheck=refcheck )
103103 self.data.m = self.data.n
104104 return self.ao
105105
106- cdef inline void append(self, {{arg}} x):
106+ cdef inline void append(self, {{arg}} x, refcheck=True ):
107107
108108 if needs_resize(self.data):
109- self.resize()
109+ self.resize(refcheck=refcheck )
110110
111111 append_data_{{dtype}}(self.data, x)
112112
@@ -130,11 +130,12 @@ cdef class StringVector:
130130 self.data.m = _INIT_VEC_CAP
131131 self.data.data = <char **> malloc(self.data.m * sizeof(char *))
132132
133- cdef resize(self):
133+ cdef resize(self, refcheck=True ):
134134 cdef:
135135 char **orig_data
136136 size_t i, m
137137
138+ # refcheck ignored, for compatibility only
138139 m = self.data.m
139140 self.data.m = max(self.data.m * 4, _INIT_VEC_CAP)
140141
@@ -154,23 +155,24 @@ cdef class StringVector:
154155 def __len__(self):
155156 return self.data.n
156157
157- def to_array(self):
158+ def to_array(self, refcheck=True ):
158159 cdef:
159160 ndarray ao
160161 size_t n
161162 object val
162-
163+
164+ # refcheck is unused but needed for API compatibility
163165 ao = np.empty(self.data.n, dtype=np.object)
164166 for i in range(self.data.n):
165167 val = self.data.data[i]
166168 ao[i] = val
167169 self.data.m = self.data.n
168170 return ao
169171
170- cdef inline void append(self, char * x):
172+ cdef inline void append(self, char * x, refcheck=True ):
171173
172174 if needs_resize(self.data):
173- self.resize()
175+ self.resize(refcheck=refcheck )
174176
175177 append_data_string(self.data, x)
176178
@@ -191,18 +193,18 @@ cdef class ObjectVector:
191193 def __len__(self):
192194 return self.n
193195
194- cdef inline append(self, object o):
196+ cdef inline append(self, object o, refcheck=True ):
195197 if self.n == self.m:
196198 self.m = max(self.m * 2, _INIT_VEC_CAP)
197- self.ao.resize(self.m)
199+ self.ao.resize(self.m, refcheck=refcheck )
198200 self.data = <PyObject**> self.ao.data
199201
200202 Py_INCREF(o)
201203 self.data[self.n] = <PyObject*> o
202204 self.n += 1
203205
204- def to_array(self):
205- self.ao.resize(self.n)
206+ def to_array(self, refcheck=True ):
207+ self.ao.resize(self.n, refcheck=refcheck )
206208 self.m = self.n
207209 return self.ao
208210
@@ -324,13 +326,13 @@ cdef class {{name}}HashTable(HashTable):
324326
325327 def factorize(self, {{dtype}}_t values):
326328 uniques = {{name}}Vector()
327- labels = self.get_labels(values, uniques, 0, 0)
328- return uniques.to_array(), labels
329+ labels = self.get_labels(values, uniques, 0, 0, refcheck=False )
330+ return uniques.to_array(refcheck=False ), labels
329331
330332 @cython.boundscheck(False)
331333 def get_labels(self, {{dtype}}_t[:] values, {{name}}Vector uniques,
332334 Py_ssize_t count_prior, Py_ssize_t na_sentinel,
333- bint check_null=True):
335+ bint check_null=True, bint refcheck=True ):
334336 cdef:
335337 Py_ssize_t i, n = len(values)
336338 int64_t[:] labels
@@ -362,7 +364,7 @@ cdef class {{name}}HashTable(HashTable):
362364
363365 if needs_resize(ud):
364366 with gil:
365- uniques.resize()
367+ uniques.resize(refcheck=refcheck )
366368 append_data_{{dtype}}(ud, val)
367369 labels[i] = count
368370 count += 1
@@ -405,12 +407,12 @@ cdef class {{name}}HashTable(HashTable):
405407
406408 if needs_resize(ud):
407409 with gil:
408- uniques.resize()
410+ uniques.resize(refcheck=False )
409411 append_data_{{dtype}}(ud, val)
410412 labels[i] = count
411413 count += 1
412414
413- arr_uniques = uniques.to_array()
415+ arr_uniques = uniques.to_array(refcheck=False )
414416
415417 return np.asarray(labels), arr_uniques
416418
@@ -438,25 +440,25 @@ cdef class {{name}}HashTable(HashTable):
438440 kh_put_{{dtype}}(self.table, val, &ret)
439441 if needs_resize(ud):
440442 with gil:
441- uniques.resize()
443+ uniques.resize(refcheck=False )
442444 append_data_{{dtype}}(ud, val)
443445 elif not seen_na:
444446 seen_na = 1
445447 if needs_resize(ud):
446448 with gil:
447- uniques.resize()
449+ uniques.resize(refcheck=False )
448450 append_data_{{dtype}}(ud, NAN)
449451 {{else}}
450452 k = kh_get_{{dtype}}(self.table, val)
451453 if k == self.table.n_buckets:
452454 kh_put_{{dtype}}(self.table, val, &ret)
453455 if needs_resize(ud):
454456 with gil:
455- uniques.resize()
457+ uniques.resize(refcheck=False )
456458 append_data_{{dtype}}(ud, val)
457459 {{endif}}
458460
459- return uniques.to_array()
461+ return uniques.to_array(refcheck=False )
460462
461463{{endfor}}
462464
@@ -571,12 +573,12 @@ cdef class StringHashTable(HashTable):
571573 uniques = ObjectVector()
572574 for i in range(count):
573575 uniques.append(values[uindexer[i]])
574- return uniques.to_array()
576+ return uniques.to_array(refcheck=False )
575577
576578 def factorize(self, ndarray[object] values):
577579 uniques = ObjectVector()
578- labels = self.get_labels(values, uniques, 0, 0)
579- return uniques.to_array(), labels
580+ labels = self.get_labels(values, uniques, 0, 0, refcheck=0 )
581+ return uniques.to_array(refcheck=False ), labels
580582
581583 @cython.boundscheck(False)
582584 def lookup(self, ndarray[object] values):
@@ -642,7 +644,7 @@ cdef class StringHashTable(HashTable):
642644 @cython.boundscheck(False)
643645 def get_labels(self, ndarray[object] values, ObjectVector uniques,
644646 Py_ssize_t count_prior, int64_t na_sentinel,
645- bint check_null=1):
647+ bint check_null=1, bint refcheck=1 ):
646648 cdef:
647649 Py_ssize_t i, n = len(values)
648650 int64_t[:] labels
@@ -654,6 +656,7 @@ cdef class StringHashTable(HashTable):
654656 char **vecs
655657 khiter_t k
656658
659+ # refcheck is for compatibility
657660 # these by-definition *must* be strings
658661 labels = np.zeros(n, dtype=np.int64)
659662 uindexer = np.empty(n, dtype=np.int64)
@@ -811,11 +814,11 @@ cdef class PyObjectHashTable(HashTable):
811814 seen_na = 1
812815 uniques.append(nan)
813816
814- return uniques.to_array()
817+ return uniques.to_array(refcheck=False )
815818
816819 def get_labels(self, ndarray[object] values, ObjectVector uniques,
817820 Py_ssize_t count_prior, int64_t na_sentinel,
818- bint check_null=True):
821+ bint check_null=True, bint refcheck=True ):
819822 cdef:
820823 Py_ssize_t i, n = len(values)
821824 int64_t[:] labels
@@ -968,5 +971,5 @@ cdef class MultiIndexHashTable(HashTable):
968971
969972 def get_labels(self, object mi, ObjectVector uniques,
970973 Py_ssize_t count_prior, int64_t na_sentinel,
971- bint check_null=True):
974+ bint check_null=True, bint refcheck=True ):
972975 raise NotImplementedError
0 commit comments