@@ -7,11 +7,19 @@ from io import StringIO
77
88from libc.string cimport strchr
99
10+ import cython
11+
12+ from cpython cimport PyObject_Str, PyUnicode_Join
13+
1014from cpython.datetime cimport datetime, datetime_new, import_datetime
1115from cpython.version cimport PY_VERSION_HEX
1216import_datetime()
1317
1418import numpy as np
19+ cimport numpy as cnp
20+ from numpy cimport (PyArray_GETITEM, PyArray_ITER_DATA, PyArray_ITER_NEXT,
21+ PyArray_IterNew, flatiter, float64_t)
22+ cnp.import_array()
1523
1624# dateutil compat
1725from dateutil.tz import (tzoffset,
@@ -26,7 +34,7 @@ from pandas._config import get_option
2634
2735from pandas._libs.tslibs.ccalendar import MONTH_NUMBERS
2836from pandas._libs.tslibs.nattype import nat_strings, NaT
29- from pandas._libs.tslibs.util cimport get_c_string_buf_and_size
37+ from pandas._libs.tslibs.util cimport is_array, get_c_string_buf_and_size
3038
3139cdef extern from " ../src/headers/portable.h" :
3240 int getdigit_ascii(char c, int default) nogil
@@ -880,3 +888,117 @@ def _guess_datetime_format(dt_str, dayfirst=False, dt_str_parse=du_parse,
880888 return guessed_format
881889 else :
882890 return None
891+
892+
893+ @ cython.wraparound (False )
894+ @ cython.boundscheck (False )
895+ cdef inline object convert_to_unicode(object item,
896+ bint keep_trivial_numbers):
897+ """
898+ Convert `item` to str.
899+
900+ Parameters
901+ ----------
902+ item : object
903+ keep_trivial_numbers : bool
904+ if True, then conversion (to string from integer/float zero)
905+ is not performed
906+
907+ Returns
908+ -------
909+ str or int or float
910+ """
911+ cdef:
912+ float64_t float_item
913+
914+ if keep_trivial_numbers:
915+ if isinstance (item, int ):
916+ if < int > item == 0 :
917+ return item
918+ elif isinstance (item, float ):
919+ float_item = item
920+ if float_item == 0.0 or float_item != float_item:
921+ return item
922+
923+ if not isinstance (item, str ):
924+ item = PyObject_Str(item)
925+
926+ return item
927+
928+
929+ @ cython.wraparound (False )
930+ @ cython.boundscheck (False )
931+ def _concat_date_cols (tuple date_cols , bint keep_trivial_numbers = True ):
932+ """
933+ Concatenates elements from numpy arrays in `date_cols` into strings.
934+
935+ Parameters
936+ ----------
937+ date_cols : tuple of numpy arrays
938+ keep_trivial_numbers : bool, default True
939+ if True and len(date_cols) == 1, then
940+ conversion (to string from integer/float zero) is not performed
941+
942+ Returns
943+ -------
944+ arr_of_rows : ndarray (dtype=object)
945+
946+ Examples
947+ --------
948+ >>> dates=np.array(['3/31/2019', '4/31/2019'], dtype=object)
949+ >>> times=np.array(['11:20', '10:45'], dtype=object)
950+ >>> result = _concat_date_cols((dates, times))
951+ >>> result
952+ array(['3/31/2019 11:20', '4/31/2019 10:45'], dtype=object)
953+ """
954+ cdef:
955+ Py_ssize_t rows_count = 0 , col_count = len (date_cols)
956+ Py_ssize_t col_idx, row_idx
957+ list list_to_join
958+ cnp.ndarray[object ] iters
959+ object [::1 ] iters_view
960+ flatiter it
961+ cnp.ndarray[object ] result
962+ object [:] result_view
963+
964+ if col_count == 0 :
965+ return np.zeros(0 , dtype = object )
966+
967+ if not all (is_array(array) for array in date_cols):
968+ raise ValueError (" not all elements from date_cols are numpy arrays" )
969+
970+ rows_count = min (len (array) for array in date_cols)
971+ result = np.zeros(rows_count, dtype = object )
972+ result_view = result
973+
974+ if col_count == 1 :
975+ array = date_cols[0 ]
976+ it = < flatiter> PyArray_IterNew(array)
977+ for row_idx in range (rows_count):
978+ item = PyArray_GETITEM(array, PyArray_ITER_DATA(it))
979+ result_view[row_idx] = convert_to_unicode(item,
980+ keep_trivial_numbers)
981+ PyArray_ITER_NEXT(it)
982+ else :
983+ # create fixed size list - more effecient memory allocation
984+ list_to_join = [None ] * col_count
985+ iters = np.zeros(col_count, dtype = object )
986+
987+ # create memoryview of iters ndarray, that will contain some
988+ # flatiter's for each array in `date_cols` - more effecient indexing
989+ iters_view = iters
990+ for col_idx, array in enumerate (date_cols):
991+ iters_view[col_idx] = PyArray_IterNew(array)
992+
993+ # array elements that are on the same line are converted to one string
994+ for row_idx in range (rows_count):
995+ for col_idx, array in enumerate (date_cols):
996+ # this cast is needed, because we did not find a way
997+ # to efficiently store `flatiter` type objects in ndarray
998+ it = < flatiter> iters_view[col_idx]
999+ item = PyArray_GETITEM(array, PyArray_ITER_DATA(it))
1000+ list_to_join[col_idx] = convert_to_unicode(item, False )
1001+ PyArray_ITER_NEXT(it)
1002+ result_view[row_idx] = PyUnicode_Join(' ' , list_to_join)
1003+
1004+ return result
0 commit comments