From f1fada62a67ce93dbca9513aea2cfa4e80f771d1 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Wed, 12 Feb 2025 21:51:26 -0500 Subject: [PATCH 1/3] Add additional pattern for datetime w/o timezone --- src/lib.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 803b1f5..9e1fc8b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -80,6 +80,7 @@ mod format { pub const YYYYMMDDHHMMSS_HYPHENATED_OFFSET: &str = "%Y-%m-%d %H:%M:%S %#z"; pub const YYYYMMDDHHMMSS_HYPHENATED_ZULU: &str = "%Y-%m-%d %H:%M:%SZ"; pub const YYYYMMDDHHMMSS_T_SEP_HYPHENATED_OFFSET: &str = "%Y-%m-%dT%H:%M:%S%#z"; + pub const YYYYMMDDHHMMSS_T_SEP_HYPHENATED_ZULU: &str = "%Y-%m-%dT%H:%M:%SZ"; pub const YYYYMMDDHHMMSS_T_SEP_HYPHENATED_SPACE_OFFSET: &str = "%Y-%m-%dT%H:%M:%S %#z"; pub const YYYYMMDDHHMMS_T_SEP: &str = "%Y-%m-%dT%H:%M:%S"; pub const UTC_OFFSET: &str = "UTC%#z"; @@ -88,7 +89,7 @@ mod format { /// Whether the pattern ends in the character `Z`. pub(crate) fn is_zulu(pattern: &str) -> bool { - pattern == YYYYMMDDHHMMSS_HYPHENATED_ZULU + pattern == YYYYMMDDHHMMSS_HYPHENATED_ZULU || pattern == YYYYMMDDHHMMSS_T_SEP_HYPHENATED_ZULU } /// Patterns for datetimes with timezones. @@ -113,10 +114,11 @@ mod format { /// Patterns for datetimes without timezones. /// /// These are in decreasing order of length. - pub(crate) const PATTERNS_NO_TZ: [(&str, usize); 8] = [ + pub(crate) const PATTERNS_NO_TZ: [(&str, usize); 9] = [ (YYYYMMDDHHMMSS, 29), (POSIX_LOCALE, 24), (YYYYMMDDHHMMSS_HYPHENATED_ZULU, 20), + (YYYYMMDDHHMMSS_T_SEP_HYPHENATED_ZULU, 20), (YYYYMMDDHHMMS_T_SEP, 19), (YYYYMMDDHHMMS, 19), (YYYY_MM_DD_HH_MM, 16), From 8aee979de590d135efbf4f47996e4d86c0e2e730 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Thu, 13 Feb 2025 19:16:36 -0500 Subject: [PATCH 2/3] Parse reference date and delta from same string Change the `parse_datetime()` function so that it parses both a reference date and a time delta from one string. The new implementation attempts to parse the datetime from the longest possible prefix of the string. The remainder of the string is parsed as the time delta. This allows us to parse more combinations of reference dates and time deltas more easily. Fixes #104 --- src/lib.rs | 152 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 124 insertions(+), 28 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 9e1fc8b..342078d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -234,8 +234,34 @@ pub fn parse_datetime_at_date + Clone>( // TODO: Replace with a proper customiseable parsing solution using `nom`, `grmtools`, or // similar - // Formats with offsets don't require NaiveDateTime workaround - // + // Try to parse a reference date first. Try parsing from longest + // pattern to shortest pattern. If a reference date can be parsed, + // then try to parse a time delta from the remaining slice. If no + // reference date could be parsed, then try to parse the entire + // string as a time delta. If no time delta could be parsed, + // return an error. + let (ref_date, n) = match parse_reference_date(date, s.as_ref()) { + Some((ref_date, n)) => (ref_date, n), + None => { + let tz = TimeZone::from_offset(date.offset()); + match date.naive_local().and_local_timezone(tz) { + MappedLocalTime::Single(ref_date) => (ref_date, 0), + _ => return Err(ParseDateTimeError::InvalidInput), + } + } + }; + parse_relative_time_at_date(ref_date, &s.as_ref()[n..]) +} + +/// Parse an absolute datetime from a prefix of s, if possible. +/// +/// Try to parse the longest possible absolute datetime at the beginning +/// of string `s`. Return the parsed datetime and the index in `s` at +/// which the datetime ended. +fn parse_reference_date(date: DateTime, s: S) -> Option<(DateTime, usize)> +where + S: AsRef, +{ // HACK: if the string ends with a single digit preceded by a + or - // sign, then insert a 0 between the sign and the digit to make it // possible for `chrono` to parse it. @@ -244,7 +270,11 @@ pub fn parse_datetime_at_date + Clone>( for (fmt, n) in format::PATTERNS_TZ { if tmp_s.len() >= n { if let Ok(parsed) = DateTime::parse_from_str(&tmp_s[0..n], fmt) { - return Ok(parsed); + if tmp_s == s.as_ref() { + return Some((parsed, n)); + } else { + return Some((parsed, n - 1)); + } } } } @@ -261,11 +291,11 @@ pub fn parse_datetime_at_date + Clone>( .unwrap() .from_local_datetime(&parsed) { - MappedLocalTime::Single(datetime) => return Ok(datetime), - _ => return Err(ParseDateTimeError::InvalidInput), + MappedLocalTime::Single(datetime) => return Some((datetime, n)), + _ => return None, } } else if let Ok(dt) = naive_dt_to_fixed_offset(date, parsed) { - return Ok(dt); + return Some((dt, n)); } } } @@ -289,13 +319,13 @@ pub fn parse_datetime_at_date + Clone>( let dt = DateTime::::from(beginning_of_day); - return Ok(dt); + return Some((dt, s.as_ref().len())); } // Parse epoch seconds if let Ok(timestamp) = parse_timestamp(s.as_ref()) { if let Some(timestamp_date) = DateTime::from_timestamp(timestamp, 0) { - return Ok(timestamp_date.into()); + return Some((timestamp_date.into(), s.as_ref().len())); } } @@ -305,7 +335,7 @@ pub fn parse_datetime_at_date + Clone>( if let Ok(parsed) = NaiveDate::parse_from_str(&s.as_ref()[0..n], fmt) { let datetime = parsed.and_hms_opt(0, 0, 0).unwrap(); if let Ok(dt) = naive_dt_to_fixed_offset(date, datetime) { - return Ok(dt); + return Some((dt, n)); } } } @@ -320,25 +350,21 @@ pub fn parse_datetime_at_date + Clone>( if ts.len() == n + 12 { let f = format::YYYYMMDDHHMM.to_owned() + fmt; if let Ok(parsed) = DateTime::parse_from_str(&ts, &f) { - return Ok(parsed); + if tmp_s == s.as_ref() { + return Some((parsed, n)); + } else { + return Some((parsed, n - 1)); + } } } } - // Parse relative time. - if let Ok(datetime) = parse_relative_time_at_date(date, s.as_ref()) { - return Ok(DateTime::::from(datetime)); - } - // parse time only dates if let Some(date_time) = parse_time_only_str::parse_time_only(date, s.as_ref()) { - return Ok(date_time); + return Some((date_time, s.as_ref().len())); } - // Default parse and failure - s.as_ref() - .parse() - .map_err(|_| (ParseDateTimeError::InvalidInput)) + None } // Convert NaiveDateTime to DateTime by assuming the offset @@ -664,14 +690,10 @@ mod tests { assert!(crate::parse_datetime("bogus +1 day").is_err()); } - // TODO Re-enable this when we parse the absolute datetime and the - // time delta separately, see - // . - // - // #[test] - // fn test_parse_invalid_delta() { - // assert!(crate::parse_datetime("1997-01-01 bogus").is_err()); - // } + #[test] + fn test_parse_invalid_delta() { + assert!(crate::parse_datetime("1997-01-01 bogus").is_err()); + } #[test] fn test_parse_datetime_tz_nodelta() { @@ -743,6 +765,80 @@ mod tests { } } + #[test] + fn test_parse_datetime_tz_delta() { + std::env::set_var("TZ", "UTC0"); + + // 1998-01-01 + let expected = chrono::NaiveDate::from_ymd_opt(1998, 1, 1) + .unwrap() + .and_hms_opt(0, 0, 0) + .unwrap() + .and_utc() + .fixed_offset(); + + for s in [ + "1997-01-01 00:00:00 +0000 +1 year", + "1997-01-01 00:00:00 +00 +1 year", + "199701010000 +0000 +1 year", + "199701010000UTC+0000 +1 year", + "199701010000Z+0000 +1 year", + "1997-01-01T00:00:00Z +1 year", + "1997-01-01 00:00 +0000 +1 year", + "1997-01-01 00:00:00 +0000 +1 year", + "1997-01-01T00:00:00+0000 +1 year", + "1997-01-01T00:00:00+00 +1 year", + ] { + let actual = crate::parse_datetime(s).unwrap(); + assert_eq!(actual, expected); + } + } + + #[test] + fn test_parse_datetime_notz_delta() { + std::env::set_var("TZ", "UTC0"); + let expected = chrono::NaiveDate::from_ymd_opt(1998, 1, 1) + .unwrap() + .and_hms_opt(0, 0, 0) + .unwrap() + .and_utc() + .fixed_offset(); + + for s in [ + "1997-01-01 00:00:00.000000000 +1 year", + "Wed Jan 1 00:00:00 1997 +1 year", + "1997-01-01T00:00:00 +1 year", + "1997-01-01 00:00:00 +1 year", + "1997-01-01 00:00 +1 year", + "199701010000.00 +1 year", + "199701010000 +1 year", + ] { + let actual = crate::parse_datetime(s).unwrap(); + assert_eq!(actual, expected); + } + } + + #[test] + fn test_parse_date_notz_delta() { + std::env::set_var("TZ", "UTC0"); + let expected = chrono::NaiveDate::from_ymd_opt(1998, 1, 1) + .unwrap() + .and_hms_opt(0, 0, 0) + .unwrap() + .and_utc() + .fixed_offset(); + + for s in [ + "1997-01-01 +1 year", + "19970101 +1 year", + "01/01/1997 +1 year", + "01/01/97 +1 year", + ] { + let actual = crate::parse_datetime(s).unwrap(); + assert_eq!(actual, expected); + } + } + #[test] fn test_time_only() { use chrono::{FixedOffset, Local}; From d4353d0888f0ee980c641563eb4435b850a79818 Mon Sep 17 00:00:00 2001 From: jfinkels Date: Sun, 16 Feb 2025 11:16:23 -0500 Subject: [PATCH 3/3] Use str.ends_with for patterns ending in Z Co-authored-by: Daniel Hofstetter --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 342078d..fc3e7df 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -89,7 +89,7 @@ mod format { /// Whether the pattern ends in the character `Z`. pub(crate) fn is_zulu(pattern: &str) -> bool { - pattern == YYYYMMDDHHMMSS_HYPHENATED_ZULU || pattern == YYYYMMDDHHMMSS_T_SEP_HYPHENATED_ZULU + pattern.ends_with('Z') } /// Patterns for datetimes with timezones.