@@ -3,17 +3,15 @@ Parsing functions for datetime and datetime-like strings.
33"""
44import re
55import time
6- from io import StringIO
76
87from libc.string cimport strchr
98
109import cython
1110from cython import Py_ssize_t
1211
1312from cpython.object cimport PyObject_Str
14- from cpython.unicode cimport PyUnicode_Join
1513
16- from cpython.datetime cimport datetime, datetime_new, import_datetime
14+ from cpython.datetime cimport datetime, datetime_new, import_datetime, tzinfo
1715from cpython.version cimport PY_VERSION_HEX
1816import_datetime()
1917
@@ -37,6 +35,7 @@ from pandas._config import get_option
3735from pandas._libs.tslibs.ccalendar import MONTH_NUMBERS
3836from pandas._libs.tslibs.nattype import nat_strings, NaT
3937from pandas._libs.tslibs.util cimport is_array, get_c_string_buf_and_size
38+ from pandas._libs.tslibs.frequencies cimport get_rule_month
4039
4140cdef extern from " ../src/headers/portable.h" :
4241 int getdigit_ascii(char c, int default) nogil
@@ -86,16 +85,15 @@ cdef inline int _parse_4digit(const char* s):
8685 return result
8786
8887
89- cdef inline object _parse_delimited_date(object date_string, bint dayfirst):
88+ cdef inline object _parse_delimited_date(str date_string, bint dayfirst):
9089 """
9190 Parse special cases of dates: MM/DD/YYYY, DD/MM/YYYY, MM/YYYY.
91+
9292 At the beginning function tries to parse date in MM/DD/YYYY format, but
9393 if month > 12 - in DD/MM/YYYY (`dayfirst == False`).
9494 With `dayfirst == True` function makes an attempt to parse date in
9595 DD/MM/YYYY, if an attempt is wrong - in DD/MM/YYYY
9696
97- Note
98- ----
9997 For MM/DD/YYYY, DD/MM/YYYY: delimiter can be a space or one of /-.
10098 For MM/YYYY: delimiter can be a space or one of /-
10199 If `date_string` can't be converted to date, then function returns
@@ -104,11 +102,13 @@ cdef inline object _parse_delimited_date(object date_string, bint dayfirst):
104102 Parameters
105103 ----------
106104 date_string : str
107- dayfirst : bint
105+ dayfirst : bool
108106
109107 Returns:
110108 --------
111- datetime, resolution
109+ datetime or None
110+ str or None
111+ Describing resolution of the parsed string.
112112 """
113113 cdef:
114114 const char * buf
@@ -156,18 +156,19 @@ cdef inline object _parse_delimited_date(object date_string, bint dayfirst):
156156 raise DateParseError(f" Invalid date specified ({month}/{day})" )
157157
158158
159- cdef inline bint does_string_look_like_time(object parse_string):
159+ cdef inline bint does_string_look_like_time(str parse_string):
160160 """
161161 Checks whether given string is a time: it has to start either from
162162 H:MM or from HH:MM, and hour and minute values must be valid.
163163
164164 Parameters
165165 ----------
166- date_string : str
166+ parse_string : str
167167
168168 Returns:
169169 --------
170- whether given string is a time
170+ bool
171+ Whether given string is potentially a time.
171172 """
172173 cdef:
173174 const char * buf
@@ -188,9 +189,10 @@ cdef inline bint does_string_look_like_time(object parse_string):
188189 return 0 <= hour <= 23 and 0 <= minute <= 59
189190
190191
191- def parse_datetime_string (date_string , freq = None , dayfirst = False ,
192+ def parse_datetime_string (date_string: str , freq = None , dayfirst = False ,
192193 yearfirst = False , **kwargs ):
193- """ parse datetime string, only returns datetime.
194+ """
195+ Parse datetime string, only returns datetime.
194196 Also cares special handling matching time patterns.
195197
196198 Returns
@@ -270,16 +272,17 @@ def parse_time_string(arg: str, freq=None, dayfirst=None, yearfirst=None):
270272 return res
271273
272274
273- cdef parse_datetime_string_with_reso(date_string, freq = None , dayfirst = False ,
275+ cdef parse_datetime_string_with_reso(str date_string, freq = None , dayfirst = False ,
274276 yearfirst = False ):
275- """ parse datetime string, only returns datetime
277+ """
278+ Parse datetime string and try to identify its resolution.
276279
277280 Returns
278281 -------
279- parsed : datetime
280- parsed2 : datetime/dateutil.parser._result
281- reso : str
282- inferred resolution
282+ datetime
283+ datetime/dateutil.parser._result
284+ str
285+ Inferred resolution of the parsed string.
283286
284287 Raises
285288 ------
@@ -315,18 +318,19 @@ cdef parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False,
315318 return parsed, parsed, reso
316319
317320
318- cpdef bint _does_string_look_like_datetime(object py_string):
321+ cpdef bint _does_string_look_like_datetime(str py_string):
319322 """
320323 Checks whether given string is a datetime: it has to start with '0' or
321324 be greater than 1000.
322325
323326 Parameters
324327 ----------
325- py_string: object
328+ py_string: str
326329
327330 Returns
328331 -------
329- whether given string is a datetime
332+ bool
333+ Whether given string is potentially a datetime.
330334 """
331335 cdef:
332336 const char * buf
@@ -370,9 +374,6 @@ cdef inline object _parse_dateabbr_string(object date_string, object default,
370374 # special handling for possibilities eg, 2Q2005, 2Q05, 2005Q1, 05Q1
371375 assert isinstance (date_string, str )
372376
373- # len(date_string) == 0
374- # should be NaT???
375-
376377 if date_string in nat_strings:
377378 return NaT, NaT, ' '
378379
@@ -427,7 +428,7 @@ cdef inline object _parse_dateabbr_string(object date_string, object default,
427428 if freq is not None :
428429 # hack attack, #1228
429430 try :
430- mnum = MONTH_NUMBERS[_get_rule_month (freq)] + 1
431+ mnum = MONTH_NUMBERS[get_rule_month (freq)] + 1
431432 except (KeyError , ValueError ):
432433 raise DateParseError(f' Unable to retrieve month '
433434 f' information from given '
@@ -467,21 +468,16 @@ cdef inline object _parse_dateabbr_string(object date_string, object default,
467468 raise ValueError (f' Unable to parse {date_string}' )
468469
469470
470- cdef dateutil_parse(object timestr, object default, ignoretz = False ,
471+ cdef dateutil_parse(str timestr, object default, ignoretz = False ,
471472 tzinfos = None , dayfirst = None , yearfirst = None ):
472473 """ lifted from dateutil to get resolution"""
473474
474475 cdef:
475- object fobj, res, attr, ret, tzdata
476+ object res, attr, ret, tzdata
476477 object reso = None
477478 dict repl = {}
478479
479- fobj = StringIO(str (timestr))
480- res = DEFAULTPARSER._parse(fobj, dayfirst = dayfirst, yearfirst = yearfirst)
481-
482- # dateutil 2.2 compat
483- if isinstance (res, tuple ): # PyTuple_Check
484- res, _ = res
480+ res, _ = DEFAULTPARSER._parse(timestr, dayfirst = dayfirst, yearfirst = yearfirst)
485481
486482 if res is None :
487483 raise ValueError (f" Unknown datetime string format, unable to parse: {timestr}" )
@@ -507,20 +503,22 @@ cdef dateutil_parse(object timestr, object default, ignoretz=False,
507503 ret = ret + relativedelta.relativedelta(weekday = res.weekday)
508504 if not ignoretz:
509505 if callable (tzinfos) or tzinfos and res.tzname in tzinfos:
506+ # Note: as of 1.0 this is not reached because
507+ # we never pass tzinfos, see GH#22234
510508 if callable (tzinfos):
511509 tzdata = tzinfos(res.tzname, res.tzoffset)
512510 else :
513511 tzdata = tzinfos.get(res.tzname)
514- if isinstance (tzdata, datetime. tzinfo):
515- tzinfo = tzdata
512+ if isinstance (tzdata, tzinfo):
513+ new_tzinfo = tzdata
516514 elif isinstance (tzdata, str ):
517- tzinfo = _dateutil_tzstr(tzdata)
515+ new_tzinfo = _dateutil_tzstr(tzdata)
518516 elif isinstance (tzdata, int ):
519- tzinfo = tzoffset(res.tzname, tzdata)
517+ new_tzinfo = tzoffset(res.tzname, tzdata)
520518 else :
521519 raise ValueError (" offset must be tzinfo subclass, "
522520 " tz string, or int offset" )
523- ret = ret.replace(tzinfo = tzinfo )
521+ ret = ret.replace(tzinfo = new_tzinfo )
524522 elif res.tzname and res.tzname in time.tzname:
525523 ret = ret.replace(tzinfo = _dateutil_tzlocal())
526524 elif res.tzoffset == 0 :
@@ -530,27 +528,6 @@ cdef dateutil_parse(object timestr, object default, ignoretz=False,
530528 return ret, reso
531529
532530
533- cdef object _get_rule_month(object source, object default = ' DEC' ):
534- """
535- Return starting month of given freq, default is December.
536-
537- Example
538- -------
539- >>> _get_rule_month('D')
540- 'DEC'
541-
542- >>> _get_rule_month('A-JAN')
543- 'JAN'
544- """
545- if hasattr (source, ' freqstr' ):
546- source = source.freqstr
547- source = source.upper()
548- if ' -' not in source:
549- return default
550- else :
551- return source.split(' -' )[1 ]
552-
553-
554531# ----------------------------------------------------------------------
555532# Parsing for type-inference
556533
@@ -939,14 +916,14 @@ def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True):
939916
940917 Parameters
941918 ----------
942- date_cols : tuple of numpy arrays
919+ date_cols : tuple[ndarray]
943920 keep_trivial_numbers : bool, default True
944921 if True and len(date_cols) == 1, then
945922 conversion (to string from integer/float zero) is not performed
946923
947924 Returns
948925 -------
949- arr_of_rows : ndarray (dtype= object)
926+ arr_of_rows : ndarray[ object]
950927
951928 Examples
952929 --------
@@ -1004,6 +981,6 @@ def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True):
1004981 item = PyArray_GETITEM(array, PyArray_ITER_DATA(it))
1005982 list_to_join[col_idx] = convert_to_unicode(item, False )
1006983 PyArray_ITER_NEXT(it)
1007- result_view[row_idx] = PyUnicode_Join( ' ' , list_to_join)
984+ result_view[row_idx] = " " .join( list_to_join)
1008985
1009986 return result
0 commit comments