From f8adcb6c88a2acb7cd86207fed64584cd4d1cc86 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Sun, 9 Feb 2025 13:47:31 -0500 Subject: [PATCH 1/3] Add passing test for parsing datetime ending in Z Add a passing unit test for parsing a datetime ending in the letter Z, as in 2023-06-03 12:00:01Z This is treated as a datetime in the UTC time zone. --- src/lib.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 4f926ca..457c8c9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -575,4 +575,14 @@ mod tests { assert_eq!(result, Err(ParseDateTimeError::InvalidInput)); } } + + #[test] + fn test_datetime_ending_in_z() { + use crate::parse_datetime; + use chrono::{TimeZone, Utc}; + + let actual = parse_datetime("2023-06-03 12:00:01Z").unwrap(); + let expected = Utc.with_ymd_and_hms(2023, 6, 3, 12, 0, 1).unwrap(); + assert_eq!(actual, expected); + } } From badc8876069add9ffdcb92cca6bb599082e7ca1f Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Sun, 9 Feb 2025 13:47:52 -0500 Subject: [PATCH 2/3] Greedy parsing of datetime before time delta Simulate greedy parsing of an absolute datetime in the prefix of a string before parsing a subsequent time delta in the suffix of the string. This does not change the behavior of `parse_datetime`, it just prepares the code for a future change that allows parsing both the absolute datetime and the time delta from the same string. Greedy parsing is implemented by iterating over a list of patterns in decreasing order of length so that longer patterns are tried before shorter patterns. This guarantees that if there is an absolute datetime present at the beginning of the string, then it will definitely be parsed and the remaining part of the string is assumed to contain a time delta. --- src/lib.rs | 160 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 116 insertions(+), 44 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 457c8c9..c10bf83 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,8 +21,8 @@ mod parse_time_only_str; mod parse_weekday; use chrono::{ - DateTime, Datelike, Duration, FixedOffset, Local, LocalResult, NaiveDateTime, TimeZone, - Timelike, + DateTime, Datelike, Duration, FixedOffset, Local, LocalResult, MappedLocalTime, NaiveDateTime, + TimeZone, Timelike, }; use parse_relative_time::parse_relative_time_at_date; @@ -78,11 +78,88 @@ mod format { pub const YYYYMMDDHHMM_ZULU_OFFSET: &str = "%Y%m%d%H%MZ%z"; pub const YYYYMMDDHHMM_HYPHENATED_OFFSET: &str = "%Y-%m-%d %H:%M %z"; pub const YYYYMMDDHHMMSS_HYPHENATED_OFFSET: &str = "%Y-%m-%d %H:%M:%S %#z"; + pub const YYYYMMDDHHMMSS_HYPHENATED_ZULU: &str = "%Y-%m-%d %H:%M:%SZ"; pub const YYYYMMDDHHMMSS_T_SEP_HYPHENATED_OFFSET: &str = "%Y-%m-%dT%H:%M:%S%#z"; + pub const YYYYMMDDHHMMSS_T_SEP_HYPHENATED_SPACE_OFFSET: &str = "%Y-%m-%dT%H:%M:%S %#z"; pub const YYYYMMDDHHMMS_T_SEP: &str = "%Y-%m-%dT%H:%M:%S"; pub const UTC_OFFSET: &str = "UTC%#z"; pub const ZULU_OFFSET: &str = "Z%#z"; pub const NAKED_OFFSET: &str = "%#z"; + + /// Whether the pattern ends in the character `Z`. + pub(crate) fn is_zulu(pattern: &str) -> bool { + pattern == YYYYMMDDHHMMSS_HYPHENATED_ZULU + } + + /// Patterns for datetimes with timezones. + /// + /// These are in decreasing order of length. The same pattern may + /// appear multiple times with different lengths if the pattern + /// accepts input strings of different lengths. For example, the + /// specifier `%#z` accepts two-digit time zone offsets (`+00`) + /// and four-digit time zone offsets (`+0000`). + pub(crate) const PATTERNS_TZ: [(&str, usize); 9] = [ + (YYYYMMDDHHMMSS_HYPHENATED_OFFSET, 25), + (YYYYMMDDHHMMSS_T_SEP_HYPHENATED_SPACE_OFFSET, 25), + (YYYYMMDDHHMMSS_T_SEP_HYPHENATED_OFFSET, 24), + (YYYYMMDDHHMMSS_HYPHENATED_OFFSET, 23), + (YYYYMMDDHHMMSS_T_SEP_HYPHENATED_OFFSET, 22), + (YYYYMMDDHHMM_HYPHENATED_OFFSET, 22), + (YYYYMMDDHHMM_UTC_OFFSET, 20), + (YYYYMMDDHHMM_OFFSET, 18), + (YYYYMMDDHHMM_ZULU_OFFSET, 18), + ]; + + /// Patterns for datetimes without timezones. + /// + /// These are in decreasing order of length. + pub(crate) const PATTERNS_NO_TZ: [(&str, usize); 8] = [ + (YYYYMMDDHHMMSS, 29), + (POSIX_LOCALE, 24), + (YYYYMMDDHHMMSS_HYPHENATED_ZULU, 20), + (YYYYMMDDHHMMS_T_SEP, 19), + (YYYYMMDDHHMMS, 19), + (YYYY_MM_DD_HH_MM, 16), + (YYYYMMDDHHMM_DOT_SS, 15), + (YYYYMMDDHHMM, 12), + ]; + + /// Patterns for dates with neither times nor timezones. + /// + /// These are in decreasing order of length. The same pattern may + /// appear multiple times with different lengths if the pattern + /// accepts input strings of different lengths. For example, the + /// specifier `%m` accepts one-digit month numbers (like `2`) and + /// two-digit month numbers (like `02` or `12`). + pub(crate) const PATTERNS_DATE_NO_TZ: [(&str, usize); 8] = [ + (ISO_8601, 10), + (MMDDYYYY_SLASH, 10), + (ISO_8601, 9), + (MMDDYYYY_SLASH, 9), + (ISO_8601, 8), + (MMDDYY_SLASH, 8), + (MMDDYYYY_SLASH, 8), + (ISO_8601_NO_SEP, 8), + ]; + + /// Patterns for lone timezone offsets. + /// + /// These are in decreasing order of length. The same pattern may + /// appear multiple times with different lengths if the pattern + /// accepts input strings of different lengths. For example, the + /// specifier `%#z` accepts two-digit time zone offsets (`+00`) + /// and four-digit time zone offsets (`+0000`). + pub(crate) const PATTERNS_OFFSET: [(&str, usize); 9] = [ + (UTC_OFFSET, 9), + (UTC_OFFSET, 8), + (ZULU_OFFSET, 7), + (UTC_OFFSET, 6), + (ZULU_OFFSET, 6), + (NAKED_OFFSET, 6), + (NAKED_OFFSET, 5), + (ZULU_OFFSET, 4), + (NAKED_OFFSET, 3), + ]; } /// Parses a time string and returns a `DateTime` representing the @@ -161,33 +238,33 @@ pub fn parse_datetime_at_date + Clone>( // sign, then insert a 0 between the sign and the digit to make it // possible for `chrono` to parse it. let pattern = Regex::new(r"([\+-])(\d)$").unwrap(); - let s = pattern.replace(s.as_ref(), "${1}0${2}"); - for fmt in [ - format::YYYYMMDDHHMM_OFFSET, - format::YYYYMMDDHHMM_HYPHENATED_OFFSET, - format::YYYYMMDDHHMMSS_HYPHENATED_OFFSET, - format::YYYYMMDDHHMMSS_T_SEP_HYPHENATED_OFFSET, - format::YYYYMMDDHHMM_UTC_OFFSET, - format::YYYYMMDDHHMM_ZULU_OFFSET, - ] { - if let Ok(parsed) = DateTime::parse_from_str(s.as_ref(), fmt) { - return Ok(parsed); + let tmp_s = pattern.replace(s.as_ref(), "${1}0${2}"); + for (fmt, n) in format::PATTERNS_TZ { + if tmp_s.len() >= n { + if let Ok(parsed) = DateTime::parse_from_str(&tmp_s[0..n], fmt) { + return Ok(parsed); + } } } // Parse formats with no offset, assume local time - for fmt in [ - format::YYYYMMDDHHMMS_T_SEP, - format::YYYYMMDDHHMM, - format::YYYYMMDDHHMMS, - format::YYYYMMDDHHMMSS, - format::YYYY_MM_DD_HH_MM, - format::YYYYMMDDHHMM_DOT_SS, - format::POSIX_LOCALE, - ] { - if let Ok(parsed) = NaiveDateTime::parse_from_str(s.as_ref(), fmt) { - if let Ok(dt) = naive_dt_to_fixed_offset(date, parsed) { - return Ok(dt); + for (fmt, n) in format::PATTERNS_NO_TZ { + if s.as_ref().len() >= n { + if let Ok(parsed) = NaiveDateTime::parse_from_str(&s.as_ref()[0..n], fmt) { + // Special case: `chrono` can only parse a datetime like + // `2000-01-01 01:23:45Z` as a naive datetime, so we + // manually force it to be in UTC. + if format::is_zulu(fmt) { + match FixedOffset::east_opt(0) + .unwrap() + .from_local_datetime(&parsed) + { + MappedLocalTime::Single(datetime) => return Ok(datetime), + _ => return Err(ParseDateTimeError::InvalidInput), + } + } else if let Ok(dt) = naive_dt_to_fixed_offset(date, parsed) { + return Ok(dt); + } } } } @@ -222,16 +299,13 @@ pub fn parse_datetime_at_date + Clone>( let ts = s.as_ref().to_owned() + " 0000"; // Parse date only formats - assume midnight local timezone - for fmt in [ - format::ISO_8601, - format::ISO_8601_NO_SEP, - format::MMDDYYYY_SLASH, - format::MMDDYY_SLASH, - ] { - let f = fmt.to_owned() + " %H%M"; - if let Ok(parsed) = NaiveDateTime::parse_from_str(&ts, &f) { - if let Ok(dt) = naive_dt_to_fixed_offset(date, parsed) { - return Ok(dt); + for (fmt, n) in format::PATTERNS_DATE_NO_TZ { + if ts.len() >= n + 5 { + let f = fmt.to_owned() + " %H%M"; + if let Ok(parsed) = NaiveDateTime::parse_from_str(&ts[0..n + 5], &f) { + if let Ok(dt) = naive_dt_to_fixed_offset(date, parsed) { + return Ok(dt); + } } } } @@ -240,15 +314,13 @@ pub fn parse_datetime_at_date + Clone>( // offsets, so instead we replicate parse_date behaviour by getting // the current date with local, and create a date time string at midnight, // before trying offset suffixes - let ts = format!("{}0000{}", date.format("%Y%m%d"), s); - for fmt in [ - format::UTC_OFFSET, - format::ZULU_OFFSET, - format::NAKED_OFFSET, - ] { - let f = format::YYYYMMDDHHMM.to_owned() + fmt; - if let Ok(parsed) = DateTime::parse_from_str(&ts, &f) { - return Ok(parsed); + let ts = format!("{}0000{}", date.format("%Y%m%d"), tmp_s.as_ref()); + for (fmt, n) in format::PATTERNS_OFFSET { + if ts.len() == n + 12 { + let f = format::YYYYMMDDHHMM.to_owned() + fmt; + if let Ok(parsed) = DateTime::parse_from_str(&ts[0..n + 12], &f) { + return Ok(parsed); + } } } From 2c55db21fa43f8eaeb09c59cef86fa8e40b44cdf Mon Sep 17 00:00:00 2001 From: jfinkels Date: Tue, 11 Feb 2025 20:05:55 -0500 Subject: [PATCH 3/3] Avoid unnecessary slicing when parsing offset Co-authored-by: Daniel Hofstetter --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index c10bf83..20e42ba 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -318,7 +318,7 @@ pub fn parse_datetime_at_date + Clone>( for (fmt, n) in format::PATTERNS_OFFSET { if ts.len() == n + 12 { let f = format::YYYYMMDDHHMM.to_owned() + fmt; - if let Ok(parsed) = DateTime::parse_from_str(&ts[0..n + 12], &f) { + if let Ok(parsed) = DateTime::parse_from_str(&ts, &f) { return Ok(parsed); } }