@@ -346,8 +346,6 @@ convert_datetimestruct_local_to_utc(pandas_datetimestruct *out_dts_utc,
346346/*
347347 * Parses (almost) standard ISO 8601 date strings. The differences are:
348348 *
349- * + The date "20100312" is parsed as the year 20100312, not as
350- * equivalent to "2010-03-12". The '-' in the dates are not optional.
351349 * + Only seconds may have a decimal point, with up to 18 digits after it
352350 * (maximum attoseconds precision).
353351 * + Either a 'T' as in ISO 8601 or a ' ' may be used to separate
@@ -396,6 +394,16 @@ parse_iso_8601_datetime(char *str, int len,
396394 char * substr , sublen ;
397395 PANDAS_DATETIMEUNIT bestunit ;
398396
397+ /* if date components in are separated by one of valid separators
398+ * months/days without leadings 0s will be parsed
399+ * (though not iso8601). If the components aren't separated,
400+ * an error code will be retuned because the date is ambigous
401+ */
402+ int has_sep = 0 ;
403+ char sep ;
404+ char valid_sep [] = {'-' , '.' , '/' , '\\' , ' ' };
405+ int valid_sep_len = 5 ;
406+
399407 /* Initialize the output to all zeros */
400408 memset (out , 0 , sizeof (pandas_datetimestruct ));
401409 out -> month = 1 ;
@@ -523,12 +531,16 @@ parse_iso_8601_datetime(char *str, int len,
523531 goto parse_error ;
524532 }
525533
526- /* PARSE THE YEAR (digits until the '-' character ) */
534+ /* PARSE THE YEAR (4 digits ) */
527535 out -> year = 0 ;
528- while (sublen > 0 && isdigit (* substr )) {
529- out -> year = 10 * out -> year + (* substr - '0' );
530- ++ substr ;
531- -- sublen ;
536+ if (sublen >= 4 && isdigit (substr [0 ]) && isdigit (substr [1 ]) &&
537+ isdigit (substr [2 ]) && isdigit (substr [3 ])) {
538+
539+ out -> year = 1000 * (substr [0 ] - '0' ) + 100 * (substr [1 ] - '0' ) +
540+ 10 * (substr [2 ] - '0' ) + (substr [3 ] - '0' );
541+
542+ substr += 4 ;
543+ sublen -= 4 ;;
532544 }
533545
534546 /* Negate the year if necessary */
@@ -538,29 +550,49 @@ parse_iso_8601_datetime(char *str, int len,
538550 /* Check whether it's a leap-year */
539551 year_leap = is_leapyear (out -> year );
540552
541- /* Next character must be a '-' or the end of the string */
553+ /* Next character must be a separator, start of month or end */
542554 if (sublen == 0 ) {
543555 if (out_local != NULL ) {
544556 * out_local = 0 ;
545557 }
546558 bestunit = PANDAS_FR_Y ;
547559 goto finish ;
548560 }
549- else if (* substr == '-' ) {
550- ++ substr ;
551- -- sublen ;
552- }
553- else {
554- goto parse_error ;
561+ else if (!isdigit (* substr )) {
562+ for (i = 0 ; i < valid_sep_len ; ++ i ) {
563+ if (* substr == valid_sep [i ]) {
564+ has_sep = 1 ;
565+ sep = valid_sep [i ];
566+ ++ substr ;
567+ -- sublen ;
568+ break ;
569+ }
570+ }
571+ if (i == valid_sep_len ) {
572+ goto parse_error ;
573+ }
555574 }
556575
557- /* Can't have a trailing '-' */
576+ /* Can't have a trailing sep */
558577 if (sublen == 0 ) {
559578 goto parse_error ;
560579 }
561580
581+
562582 /* PARSE THE MONTH (2 digits) */
563- if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
583+ if (has_sep && ((sublen >= 2 && isdigit (substr [0 ]) && !isdigit (substr [1 ]))
584+ || (sublen == 1 && isdigit (substr [0 ])))) {
585+ out -> month = (substr [0 ] - '0' );
586+
587+ if (out -> month < 1 ) {
588+ PyErr_Format (PyExc_ValueError ,
589+ "Month out of range in datetime string \"%s\"" , str );
590+ goto error ;
591+ }
592+ ++ substr ;
593+ -- sublen ;
594+ }
595+ else if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
564596 out -> month = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
565597
566598 if (out -> month < 1 || out -> month > 12 ) {
@@ -577,18 +609,22 @@ parse_iso_8601_datetime(char *str, int len,
577609
578610 /* Next character must be a '-' or the end of the string */
579611 if (sublen == 0 ) {
612+ /* dates of form YYYYMM are not valid */
613+ if (!has_sep ) {
614+ goto parse_error ;
615+ }
580616 if (out_local != NULL ) {
581617 * out_local = 0 ;
582618 }
583619 bestunit = PANDAS_FR_M ;
584620 goto finish ;
585621 }
586- else if (* substr == '-' ) {
622+ else if (has_sep && * substr == sep ) {
587623 ++ substr ;
588624 -- sublen ;
589625 }
590- else {
591- goto parse_error ;
626+ else if (! isdigit ( * substr )) {
627+ goto parse_error ;
592628 }
593629
594630 /* Can't have a trailing '-' */
@@ -597,7 +633,19 @@ parse_iso_8601_datetime(char *str, int len,
597633 }
598634
599635 /* PARSE THE DAY (2 digits) */
600- if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
636+ if (has_sep && ((sublen >= 2 && isdigit (substr [0 ]) && !isdigit (substr [1 ]))
637+ || (sublen == 1 && isdigit (substr [0 ])))) {
638+ out -> day = (substr [0 ] - '0' );
639+
640+ if (out -> day < 1 ) {
641+ PyErr_Format (PyExc_ValueError ,
642+ "Day out of range in datetime string \"%s\"" , str );
643+ goto error ;
644+ }
645+ ++ substr ;
646+ -- sublen ;
647+ }
648+ else if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
601649 out -> day = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
602650
603651 if (out -> day < 1 ||
@@ -633,14 +681,19 @@ parse_iso_8601_datetime(char *str, int len,
633681 if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
634682 out -> hour = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
635683
636- if (out -> hour < 0 || out -> hour >= 24 ) {
684+ if (out -> hour >= 24 ) {
637685 PyErr_Format (PyExc_ValueError ,
638686 "Hours out of range in datetime string \"%s\"" , str );
639687 goto error ;
640688 }
641689 substr += 2 ;
642690 sublen -= 2 ;
643691 }
692+ else if (sublen >= 1 && isdigit (substr [0 ])) {
693+ out -> hour = substr [0 ] - '0' ;
694+ ++ substr ;
695+ -- sublen ;
696+ }
644697 else {
645698 goto parse_error ;
646699 }
@@ -664,14 +717,19 @@ parse_iso_8601_datetime(char *str, int len,
664717 if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
665718 out -> min = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
666719
667- if (out -> hour < 0 || out -> min >= 60 ) {
720+ if (out -> min >= 60 ) {
668721 PyErr_Format (PyExc_ValueError ,
669722 "Minutes out of range in datetime string \"%s\"" , str );
670723 goto error ;
671724 }
672725 substr += 2 ;
673726 sublen -= 2 ;
674727 }
728+ else if (sublen >= 1 && isdigit (substr [0 ])) {
729+ out -> min = substr [0 ] - '0' ;
730+ ++ substr ;
731+ -- sublen ;
732+ }
675733 else {
676734 goto parse_error ;
677735 }
@@ -695,14 +753,19 @@ parse_iso_8601_datetime(char *str, int len,
695753 if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
696754 out -> sec = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
697755
698- if (out -> sec < 0 || out -> sec >= 60 ) {
756+ if (out -> sec >= 60 ) {
699757 PyErr_Format (PyExc_ValueError ,
700758 "Seconds out of range in datetime string \"%s\"" , str );
701759 goto error ;
702760 }
703761 substr += 2 ;
704762 sublen -= 2 ;
705763 }
764+ else if (sublen >= 1 && isdigit (substr [0 ])) {
765+ out -> sec = substr [0 ] - '0' ;
766+ ++ substr ;
767+ -- sublen ;
768+ }
706769 else {
707770 goto parse_error ;
708771 }
@@ -781,6 +844,12 @@ parse_iso_8601_datetime(char *str, int len,
781844 }
782845
783846parse_timezone :
847+ /* trim any whitepsace between time/timeezone */
848+ while (sublen > 0 && isspace (* substr )) {
849+ ++ substr ;
850+ -- sublen ;
851+ }
852+
784853 if (sublen == 0 ) {
785854 // Unlike NumPy, treating no time zone as naive
786855 goto finish ;
@@ -832,6 +901,11 @@ parse_iso_8601_datetime(char *str, int len,
832901 goto error ;
833902 }
834903 }
904+ else if (sublen >= 1 && isdigit (substr [0 ])) {
905+ offset_hour = substr [0 ] - '0' ;
906+ ++ substr ;
907+ -- sublen ;
908+ }
835909 else {
836910 goto parse_error ;
837911 }
@@ -856,6 +930,11 @@ parse_iso_8601_datetime(char *str, int len,
856930 goto error ;
857931 }
858932 }
933+ else if (sublen >= 1 && isdigit (substr [0 ])) {
934+ offset_minute = substr [0 ] - '0' ;
935+ ++ substr ;
936+ -- sublen ;
937+ }
859938 else {
860939 goto parse_error ;
861940 }
0 commit comments