@@ -1087,6 +1087,44 @@ def string_array_replace_from_nan_rep(
10871087 return arr
10881088
10891089
1090+ @ cython.boundscheck (False )
1091+ @ cython.wraparound (False )
1092+ def convert_json_to_lines (object arr ):
1093+ """
1094+ replace comma separated json with line feeds, paying special attention
1095+ to quotes & brackets
1096+ """
1097+ cdef:
1098+ Py_ssize_t i = 0 , num_open_brackets_seen = 0 , in_quotes = 0 , length
1099+ ndarray[uint8_t] narr
1100+ unsigned char v, comma, left_bracket, right_brack, newline
1101+
1102+ newline = ord (' \n ' )
1103+ comma = ord (' ,' )
1104+ left_bracket = ord (' {' )
1105+ right_bracket = ord (' }' )
1106+ quote = ord (' "' )
1107+ backslash = ord (' \\ ' )
1108+
1109+ narr = np.frombuffer(arr.encode(' utf-8' ), dtype = ' u1' ).copy()
1110+ length = narr.shape[0 ]
1111+ for i in range (length):
1112+ v = narr[i]
1113+ if v == quote and i > 0 and narr[i - 1 ] != backslash:
1114+ in_quotes = ~ in_quotes
1115+ if v == comma: # commas that should be \n
1116+ if num_open_brackets_seen == 0 and not in_quotes:
1117+ narr[i] = newline
1118+ elif v == left_bracket:
1119+ if not in_quotes:
1120+ num_open_brackets_seen += 1
1121+ elif v == right_bracket:
1122+ if not in_quotes:
1123+ num_open_brackets_seen -= 1
1124+
1125+ return narr.tostring().decode(' utf-8' )
1126+
1127+
10901128@ cython.boundscheck (False )
10911129@ cython.wraparound (False )
10921130def write_csv_rows (list data , ndarray data_index ,
0 commit comments