@@ -35,23 +35,6 @@ cdef extern from "Python.h":
3535
3636cdef size_t _INIT_VEC_CAP = 32
3737
38- def list_to_object_array (list obj ):
39- '''
40- Convert list to object ndarray. Seriously can't believe I had to write this
41- function
42- '''
43- cdef:
44- Py_ssize_t i, n
45- ndarray[object ] arr
46-
47- n = len (obj)
48- arr = np.empty(n, dtype = object )
49-
50- for i from 0 <= i < n:
51- arr[i] = obj[i]
52-
53- return arr
54-
5538cdef class Vector:
5639 pass
5740
@@ -68,6 +51,9 @@ cdef class ObjectVector(Vector):
6851 self .ao = np.empty(_INIT_VEC_CAP, dtype = object )
6952 self .data = < PyObject** > self .ao.data
7053
54+ def __len__ (self ):
55+ return self .n
56+
7157 cdef inline append(self , object o):
7258 if self .n == self .m:
7359 self .m = max (self .m * 2 , _INIT_VEC_CAP)
@@ -132,37 +118,53 @@ cdef class Int64Vector:
132118
133119 Int64VectorData_append(self .data, x)
134120
121+ ctypedef struct Float64VectorData:
122+ float64_t * data
123+ size_t n, m
124+
125+ cdef uint8_t Float64VectorData_needs_resize(Float64VectorData * data) nogil:
126+ return data.n == data.m
127+
128+ cdef void Float64VectorData_append(Float64VectorData * data, float64_t x) nogil:
129+
130+ data.data[data.n] = x
131+ data.n += 1
132+
135133cdef class Float64Vector(Vector):
136134
137135 cdef:
138- float64_t * data
139- size_t n, m
136+ Float64VectorData * data
140137 ndarray ao
141138
142139 def __cinit__ (self ):
143- self .n = 0
144- self .m = _INIT_VEC_CAP
145- self .ao = np.empty(_INIT_VEC_CAP, dtype = np.float64)
146- self .data = < float64_t* > self .ao.data
140+ self .data = < Float64VectorData * > PyMem_Malloc(sizeof(Float64VectorData))
141+ self .data.n = 0
142+ self .data.m = _INIT_VEC_CAP
143+ self .ao = np.empty(self .data.m, dtype = np.float64)
144+ self .data.data = < float64_t* > self .ao.data
147145
148146 cdef resize(self ):
149- self .m = max (self .m * 2 , _INIT_VEC_CAP)
150- self .ao.resize(self .m)
151- self .data = < float64_t* > self .ao.data
147+ self .data. m = max (self .data. m * 4 , _INIT_VEC_CAP)
148+ self .ao.resize(self .data. m)
149+ self .data.data = < float64_t* > self .ao.data
152150
153- cdef inline void append(self , float64_t x) nogil:
154- if self .n == self .m:
155- with gil:
156- self .resize()
151+ def __dealloc__ (self ):
152+ PyMem_Free(self .data)
157153
158- self .data[ self .n] = x
159- self .n += 1
154+ def __len__ ( self ):
155+ return self .data.n
160156
161157 def to_array (self ):
162- self .ao.resize(self .n)
163- self .m = self .n
158+ self .ao.resize(self .data. n)
159+ self .data. m = self .data .n
164160 return self .ao
165161
162+ cdef inline void append(self , float64_t x):
163+
164+ if Float64VectorData_needs_resize(self .data):
165+ self .resize()
166+
167+ Float64VectorData_append(self .data, x)
166168
167169cdef class HashTable:
168170 pass
@@ -459,13 +461,21 @@ cdef class Int64HashTable(HashTable):
459461 int64_t val
460462 khiter_t k
461463 Int64Vector uniques = Int64Vector()
464+ Int64VectorData * ud
462465
463- for i in range (n):
464- val = values[i]
465- k = kh_get_int64(self .table, val)
466- if k == self .table.n_buckets:
467- kh_put_int64(self .table, val, & ret)
468- uniques.append(val)
466+ ud = uniques.data
467+
468+ with nogil:
469+ for i in range (n):
470+ val = values[i]
471+ k = kh_get_int64(self .table, val)
472+ if k == self .table.n_buckets:
473+ kh_put_int64(self .table, val, & ret)
474+
475+ if Int64VectorData_needs_resize(ud):
476+ with gil:
477+ uniques.resize()
478+ Int64VectorData_append(ud, val)
469479
470480 result = uniques.to_array()
471481
@@ -526,26 +536,33 @@ cdef class Float64HashTable(HashTable):
526536 int ret = 0
527537 float64_t val
528538 khiter_t k
539+ Float64VectorData * ud
529540
530541 labels = np.empty(n, dtype = np.int64)
542+ ud = uniques.data
531543
532- for i in range (n):
533- val = values[i]
544+ with nogil:
545+ for i in range (n):
546+ val = values[i]
534547
535- if val != val:
536- labels[i] = na_sentinel
537- continue
548+ if val != val:
549+ labels[i] = na_sentinel
550+ continue
538551
539- k = kh_get_float64(self .table, val)
540- if k != self .table.n_buckets:
541- idx = self .table.vals[k]
542- labels[i] = idx
543- else :
544- k = kh_put_float64(self .table, val, & ret)
545- self .table.vals[k] = count
546- uniques.append(val)
547- labels[i] = count
548- count += 1
552+ k = kh_get_float64(self .table, val)
553+ if k != self .table.n_buckets:
554+ idx = self .table.vals[k]
555+ labels[i] = idx
556+ else :
557+ k = kh_put_float64(self .table, val, & ret)
558+ self .table.vals[k] = count
559+
560+ if Float64VectorData_needs_resize(ud):
561+ with gil:
562+ uniques.resize()
563+ Float64VectorData_append(ud, val)
564+ labels[i] = count
565+ count += 1
549566
550567 return labels
551568
@@ -588,20 +605,33 @@ cdef class Float64HashTable(HashTable):
588605 int ret = 0
589606 float64_t val
590607 khiter_t k
591- Float64Vector uniques = Float64Vector()
592608 bint seen_na = 0
609+ Float64Vector uniques = Float64Vector()
610+ Float64VectorData * ud
593611
594- for i in range (n):
595- val = values[i]
612+ ud = uniques.data
596613
597- if val == val:
598- k = kh_get_float64(self .table, val)
599- if k == self .table.n_buckets:
600- kh_put_float64(self .table, val, & ret)
601- uniques.append(val)
602- elif not seen_na:
603- seen_na = 1
604- uniques.append(NAN)
614+ with nogil:
615+ for i in range (n):
616+ val = values[i]
617+
618+ if val == val:
619+ k = kh_get_float64(self .table, val)
620+ if k == self .table.n_buckets:
621+ kh_put_float64(self .table, val, & ret)
622+
623+ if Float64VectorData_needs_resize(ud):
624+ with gil:
625+ uniques.resize()
626+ Float64VectorData_append(ud, val)
627+
628+ elif not seen_na:
629+ seen_na = 1
630+
631+ if Float64VectorData_needs_resize(ud):
632+ with gil:
633+ uniques.resize()
634+ Float64VectorData_append(ud, NAN)
605635
606636 return uniques.to_array()
607637
0 commit comments