From e53aabd914618e7ecbd5d37c090db08e1b27e754 Mon Sep 17 00:00:00 2001 From: Ben Schofield Date: Sat, 3 Jun 2023 09:58:36 -0700 Subject: [PATCH 1/6] add datetime parser Add a relaxed datetime parser. This datetime parser functions by using `chrono`s own parsing utilities and a try/succeed approach to parsing. This implementation of the datetime parser has some drawbacks and some positives. On the positive side: - it was easy to implement - it is easy to add more datetime formats to In order to add additionally supported formats, a developer can add the required format string to the `format` mod in `parse_datetime.rs`, and then add it as a potential format to the relevant `fmts` vec. On the negative: - It is not easily customiseable beyond the supported `chrono` parsing formats. E.g., `chrono` does not currently support parsing offsets without trailing zeros. `from_str("UTC+1")` should return a valid response but `chrono` fails to parse this. - Because it is an attempt driven parser, it is likely not that performant. I have not done any performance testing as part of this change, but I would expect a custom parser to perform much better. --- src/lib.rs | 3 + src/parse_datetime.rs | 216 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 219 insertions(+) create mode 100644 src/parse_datetime.rs diff --git a/src/lib.rs b/src/lib.rs index b53891a..21a4b46 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,9 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +// Expose parse_datetime +pub mod parse_datetime; + use chrono::{Duration, Local, NaiveDate, Utc}; use regex::{Error as RegexError, Regex}; use std::error::Error; diff --git a/src/parse_datetime.rs b/src/parse_datetime.rs new file mode 100644 index 0000000..5e031ac --- /dev/null +++ b/src/parse_datetime.rs @@ -0,0 +1,216 @@ +use chrono::{DateTime, FixedOffset, Local, LocalResult, NaiveDateTime, TimeZone}; + +use crate::ParseDurationError; + +/// Formats that parse input can take. +/// Taken from `touch` core util +mod format { + pub(crate) const ISO_8601: &str = "%Y-%m-%d"; + pub(crate) const ISO_8601_NO_SEP: &str = "%Y%m%d"; + pub(crate) const POSIX_LOCALE: &str = "%a %b %e %H:%M:%S %Y"; + pub(crate) const YYYYMMDDHHMM_DOT_SS: &str = "%Y%m%d%H%M.%S"; + pub(crate) const YYYYMMDDHHMMSS: &str = "%Y-%m-%d %H:%M:%S.%f"; + pub(crate) const YYYYMMDDHHMMS: &str = "%Y-%m-%d %H:%M:%S"; + pub(crate) const YYYY_MM_DD_HH_MM: &str = "%Y-%m-%d %H:%M"; + pub(crate) const YYYYMMDDHHMM: &str = "%Y%m%d%H%M"; + pub(crate) const YYYYMMDDHHMM_OFFSET: &str = "%Y%m%d%H%M %z"; + pub(crate) const YYYYMMDDHHMM_UTC_OFFSET: &str = "%Y%m%d%H%MUTC%z"; + pub(crate) const YYYYMMDDHHMM_ZULU_OFFSET: &str = "%Y%m%d%H%MZ%z"; + pub(crate) const YYYYMMDDHHMM_HYPHENATED_OFFSET: &str = "%Y-%m-%d %H:%M %z"; + pub(crate) const YYYYMMDDHHMMS_T_SEP: &str = "%Y-%m-%dT%H:%M:%S"; + pub(crate) const UTC_OFFSET: &str = "UTC%#z"; + pub(crate) const ZULU_OFFSET: &str = "Z%#z"; +} + +/// Loosely parses a time string and returns a `DateTime` representing the +/// absolute time of the string. +/// +/// # Arguments +/// +/// * `s` - A string slice representing the time. +/// +/// # Examples +/// +/// ``` +/// use chrono::{DateTime, Utc}; +/// let time = humantime_to_duration::parse_datetime::from_str("2023-06-03 12:00:01Z"); +/// assert_eq!(time.unwrap(), Utc.with_ymd_and_hms(2023, 06, 03, 12, 00, 01).unwrap()); +/// ``` +/// +/// # Supported formats +/// +/// The function supports the following formats for time: +/// +/// * ISO formats +/// * timezone offsets, e.g., "UTC-0100" +/// +/// # Returns +/// +/// * `Ok(DateTime)` - If the input string can be parsed as a time +/// * `Err(ParseDurationError)` - If the input string cannot be parsed as a relative time +/// +/// # Errors +/// +/// This function will return `Err(ParseDurationError::InvalidInput)` if the input string +/// cannot be parsed as a relative time. +/// +pub fn from_str + Clone>(s: S) -> Result, ParseDurationError> { + // TODO: Replace with a proper customiseable parsing solution using `nom`, `grmtools`, or + // similar + + // Formats with offsets don't require NaiveDateTime workaround + for fmt in [ + format::YYYYMMDDHHMM_OFFSET, + format::YYYYMMDDHHMM_HYPHENATED_OFFSET, + format::YYYYMMDDHHMM_UTC_OFFSET, + format::YYYYMMDDHHMM_ZULU_OFFSET, + ] { + if let Ok(parsed) = DateTime::parse_from_str(s.as_ref(), fmt) { + return Ok(parsed); + } + } + + // Parse formats with no offset, assume local time + for fmt in [ + format::YYYYMMDDHHMMS_T_SEP, + format::YYYYMMDDHHMM, + format::YYYYMMDDHHMMS, + format::YYYYMMDDHHMMSS, + format::YYYY_MM_DD_HH_MM, + format::YYYYMMDDHHMM_DOT_SS, + format::POSIX_LOCALE, + ] { + if let Ok(parsed) = NaiveDateTime::parse_from_str(s.as_ref(), fmt) { + if let Ok(dt) = naive_dt_to_fixed_offset(parsed) { + return Ok(dt); + } + } + } + + // Parse epoch seconds + if s.as_ref().bytes().next() == Some(b'@') { + if let Ok(parsed) = NaiveDateTime::parse_from_str(&s.as_ref()[1..], "%s") { + if let Ok(dt) = naive_dt_to_fixed_offset(parsed) { + return Ok(dt); + } + } + } + + let ts = s.as_ref().to_owned() + "0000"; + // Parse date only formats - assume midnight local timezone + for fmt in [format::ISO_8601, format::ISO_8601_NO_SEP] { + let f = fmt.to_owned() + "%H%M"; + if let Ok(parsed) = NaiveDateTime::parse_from_str(&ts, &f) { + if let Ok(dt) = naive_dt_to_fixed_offset(parsed) { + return Ok(dt); + } + } + } + + // Parse offsets. chrono doesn't provide any functionality to parse + // offsets, so instead we replicate parse_date behaviour by getting + // the current date with local, and create a date time string at midnight, + // before trying offset suffixes + let local = Local::now(); + let ts = format!("{}", local.format("%Y%m%d")) + "0000" + s.as_ref(); + for fmt in [format::UTC_OFFSET, format::ZULU_OFFSET] { + let f = format::YYYYMMDDHHMM.to_owned() + fmt; + if let Ok(parsed) = DateTime::parse_from_str(&ts, &f) { + return Ok(parsed); + } + } + + // Default parse and failure + s.as_ref() + .parse() + .map_err(|_| (ParseDurationError::InvalidInput)) +} + +// Convert NaiveDateTime to DateTime by assuming the offset +// is local time +fn naive_dt_to_fixed_offset(dt: NaiveDateTime) -> Result, ()> { + let now = Local::now(); + match now.offset().from_local_datetime(&dt) { + LocalResult::Single(dt) => Ok(dt), + _ => Err(()), + } +} + +#[cfg(test)] +mod tests { + static TEST_TIME: i64 = 1613371067; + + #[cfg(test)] + mod iso_8601 { + use std::env; + + use crate::{parse_datetime::from_str, parse_datetime::tests::TEST_TIME}; + + #[test] + fn test_t_sep() { + env::set_var("TZ", "UTC"); + let dt = "2021-02-15T06:37:47"; + let actual = from_str(dt); + assert_eq!(actual.unwrap().timestamp(), TEST_TIME); + } + + #[test] + fn test_space_sep() { + env::set_var("TZ", "UTC"); + let dt = "2021-02-15 06:37:47"; + let actual = from_str(dt); + assert_eq!(actual.unwrap().timestamp(), TEST_TIME); + } + + #[test] + fn test_space_sep_offset() { + env::set_var("TZ", "UTC"); + let dt = "2021-02-14 22:37:47 -0800"; + let actual = from_str(dt); + assert_eq!(actual.unwrap().timestamp(), TEST_TIME); + } + + #[test] + fn test_t_sep_offset() { + env::set_var("TZ", "UTC"); + let dt = "2021-02-14T22:37:47 -0800"; + let actual = from_str(dt); + assert_eq!(actual.unwrap().timestamp(), TEST_TIME); + } + } + + #[cfg(test)] + mod offsets { + use chrono::Local; + + use crate::parse_datetime::from_str; + + #[test] + fn test_positive_offsets() { + let offsets = vec![ + "UTC+07:00", + "UTC+0700", + "UTC+07", + "Z+07:00", + "Z+0700", + "Z+07", + ]; + + let expected = format!("{}{}", Local::now().format("%Y%m%d"), "0000+0700"); + for offset in offsets { + let actual = from_str(offset).unwrap(); + assert_eq!(expected, format!("{}", actual.format("%Y%m%d%H%M%z"))); + } + } + + #[test] + fn test_partial_offset() { + let offsets = vec!["UTC+00:15", "UTC+0015", "Z+00:15", "Z+0015"]; + let expected = format!("{}{}", Local::now().format("%Y%m%d"), "0000+0015"); + for offset in offsets { + let actual = from_str(offset).unwrap(); + assert_eq!(expected, format!("{}", actual.format("%Y%m%d%H%M%z"))); + } + } + } +} From c177117428385de656962743c90b4d71321d86d1 Mon Sep 17 00:00:00 2001 From: Ben Schofield Date: Mon, 5 Jun 2023 07:58:44 -0700 Subject: [PATCH 2/6] Fixes for comments --- src/parse_datetime.rs | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/src/parse_datetime.rs b/src/parse_datetime.rs index 5e031ac..eae6ab8 100644 --- a/src/parse_datetime.rs +++ b/src/parse_datetime.rs @@ -1,9 +1,12 @@ +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + use chrono::{DateTime, FixedOffset, Local, LocalResult, NaiveDateTime, TimeZone}; use crate::ParseDurationError; /// Formats that parse input can take. -/// Taken from `touch` core util +/// Taken from `touch` coreutil mod format { pub(crate) const ISO_8601: &str = "%Y-%m-%d"; pub(crate) const ISO_8601_NO_SEP: &str = "%Y%m%d"; @@ -32,7 +35,7 @@ mod format { /// # Examples /// /// ``` -/// use chrono::{DateTime, Utc}; +/// use chrono::{DateTime, Utc, TimeZone}; /// let time = humantime_to_duration::parse_datetime::from_str("2023-06-03 12:00:01Z"); /// assert_eq!(time.unwrap(), Utc.with_ymd_and_hms(2023, 06, 03, 12, 00, 01).unwrap()); /// ``` @@ -144,7 +147,7 @@ mod tests { mod iso_8601 { use std::env; - use crate::{parse_datetime::from_str, parse_datetime::tests::TEST_TIME}; + use crate::{parse_datetime::from_str, parse_datetime::tests::TEST_TIME, ParseDurationError}; #[test] fn test_t_sep() { @@ -177,13 +180,21 @@ mod tests { let actual = from_str(dt); assert_eq!(actual.unwrap().timestamp(), TEST_TIME); } + + #[test] + fn invalid_formats() { + let invalid_dts = vec!["NotADate", "202104", "202104-12T22:37:47"]; + for dt in invalid_dts { + assert_eq!(from_str(dt), Err(ParseDurationError::InvalidInput)); + } + } } #[cfg(test)] mod offsets { use chrono::Local; - use crate::parse_datetime::from_str; + use crate::{parse_datetime::from_str, ParseDurationError}; #[test] fn test_positive_offsets() { @@ -212,5 +223,13 @@ mod tests { assert_eq!(expected, format!("{}", actual.format("%Y%m%d%H%M%z"))); } } + + #[test] + fn invalid_offset_format() { + let invalid_offsets = vec!["+0700", "UTC+2", "Z-1", "UTC+01005"]; + for offset in invalid_offsets { + assert_eq!(from_str(offset), Err(ParseDurationError::InvalidInput)); + } + } } } From 8d797fcbcbe3c31aee0ac3cd3fb06a6bb3ce9d0f Mon Sep 17 00:00:00 2001 From: Ben Schofield Date: Mon, 5 Jun 2023 08:00:07 -0700 Subject: [PATCH 3/6] typo fix --- src/parse_datetime.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parse_datetime.rs b/src/parse_datetime.rs index eae6ab8..d3952a8 100644 --- a/src/parse_datetime.rs +++ b/src/parse_datetime.rs @@ -6,7 +6,7 @@ use chrono::{DateTime, FixedOffset, Local, LocalResult, NaiveDateTime, TimeZone} use crate::ParseDurationError; /// Formats that parse input can take. -/// Taken from `touch` coreutil +/// Taken from `touch` coreutils mod format { pub(crate) const ISO_8601: &str = "%Y-%m-%d"; pub(crate) const ISO_8601_NO_SEP: &str = "%Y%m%d"; From 29e7a5c335767b879a538570d3a5f4f3ddb1102b Mon Sep 17 00:00:00 2001 From: Ben Schofield <47790940+Benjscho@users.noreply.github.com> Date: Mon, 5 Jun 2023 08:04:22 -0700 Subject: [PATCH 4/6] Update src/parse_datetime.rs Co-authored-by: Sylvestre Ledru --- src/parse_datetime.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/parse_datetime.rs b/src/parse_datetime.rs index d3952a8..47094ee 100644 --- a/src/parse_datetime.rs +++ b/src/parse_datetime.rs @@ -147,7 +147,9 @@ mod tests { mod iso_8601 { use std::env; - use crate::{parse_datetime::from_str, parse_datetime::tests::TEST_TIME, ParseDurationError}; + use crate::{ + parse_datetime::from_str, parse_datetime::tests::TEST_TIME, ParseDurationError, + }; #[test] fn test_t_sep() { From bc33770075dcc952dc0fac1ff734648c9d449bc4 Mon Sep 17 00:00:00 2001 From: Ben Schofield Date: Tue, 6 Jun 2023 10:05:14 -0700 Subject: [PATCH 5/6] Update README Update the README and add a test module to parse_datetime for any examples presented in the README. --- README.md | 22 ++++++++++++++++++++-- src/parse_datetime.rs | 13 +++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d832441..8a2267f 100644 --- a/README.md +++ b/README.md @@ -4,11 +4,11 @@ [![License](http://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/uutils/humantime_to_duration/blob/main/LICENSE) [![CodeCov](https://codecov.io/gh/uutils/humantime_to_duration/branch/main/graph/badge.svg)](https://codecov.io/gh/uutils/humantime_to_duration) -A Rust crate for parsing human-readable relative time strings and converting them to a `Duration`. +A Rust crate for parsing human-readable relative time strings and converting them to a `Duration`, or parsing human-readable datetime strings and converting them to a `DateTime`. ## Features -- Parses a variety of human-readable time formats. +- Parses a variety of human-readable and standard time formats. - Supports positive and negative durations. - Allows for chaining time units (e.g., "1 hour 2 minutes" or "2 days and 2 hours"). - Calculate durations relative to a specified date. @@ -39,6 +39,15 @@ assert_eq!( ); ``` +For DateTime parsing, import the `parse_datetime` module: +``` +use humantime_to_duration::parse_datetime::from_str; +use chrono::{Local, TimeZone}; + +let dt = from_str("2021-02-14 06:37:47"); +assert_eq!(dt.unwrap(), Local.with_ymd_and_hms(2021, 2, 14, 6, 37, 47).unwrap()); +``` + ### Supported Formats The `from_str` and `from_str_at_date` functions support the following formats for relative time: @@ -56,6 +65,8 @@ The `from_str` and `from_str_at_date` functions support the following formats fo ## Return Values +### Duration + The `from_str` and `from_str_at_date` functions return: - `Ok(Duration)` - If the input string can be parsed as a relative time @@ -64,6 +75,13 @@ The `from_str` and `from_str_at_date` functions return: This function will return `Err(ParseDurationError::InvalidInput)` if the input string cannot be parsed as a relative time. +### parse_datetime + +The `from_str` function returns: + +- `Ok(DateTime)` - If the input string can be prsed as a datetime +- `Err(ParseDurationError::InvalidInput)` - If the input string cannot be parsed + ## Fuzzer To run the fuzzer: diff --git a/src/parse_datetime.rs b/src/parse_datetime.rs index 47094ee..76bc719 100644 --- a/src/parse_datetime.rs +++ b/src/parse_datetime.rs @@ -234,4 +234,17 @@ mod tests { } } } + + /// Used to test example code presented in the README. + mod readme_test { + use crate::parse_datetime::from_str; + use chrono::{TimeZone, Local}; + + #[test] + fn test_readme_code() { + let dt = from_str("2021-02-14 06:37:47"); + assert_eq!(dt.unwrap(), Local.with_ymd_and_hms(2021, 2, 14, 6, 37, 47).unwrap()); + } + + } } From 7ee33d1fb1191766b134293621ebb4b31903fba1 Mon Sep 17 00:00:00 2001 From: Ben Schofield Date: Tue, 6 Jun 2023 10:26:05 -0700 Subject: [PATCH 6/6] cargo fmt --- src/parse_datetime.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/parse_datetime.rs b/src/parse_datetime.rs index 76bc719..fcf5fae 100644 --- a/src/parse_datetime.rs +++ b/src/parse_datetime.rs @@ -234,17 +234,19 @@ mod tests { } } } - + /// Used to test example code presented in the README. mod readme_test { use crate::parse_datetime::from_str; - use chrono::{TimeZone, Local}; + use chrono::{Local, TimeZone}; #[test] fn test_readme_code() { let dt = from_str("2021-02-14 06:37:47"); - assert_eq!(dt.unwrap(), Local.with_ymd_and_hms(2021, 2, 14, 6, 37, 47).unwrap()); + assert_eq!( + dt.unwrap(), + Local.with_ymd_and_hms(2021, 2, 14, 6, 37, 47).unwrap() + ); } - } }