From cbb71b142d601f8e08a48b61a37e7a4f15a6f56f Mon Sep 17 00:00:00 2001 From: Ben Schofield Date: Mon, 29 May 2023 16:50:29 -0700 Subject: [PATCH] uucore: add datetime parser Add a relaxed datetime parser to uucore for use in `date` utility (and potentially others). This datetime parser functions by using `chrono`s own parsing utilities and a try/succeed approach to parsing. This commit adds `date -d` functionality to the `date` utility by enabling parsing of a number of date formats. This implementation of the datetime parser has some drawbacks and some positives. On the positive side: - it was easy to implement - it is easy to add more datetime formats to In order to add additionally supported formats, a developer can add the required format string to the `format` mod in `parse_datetime.rs`, and then add it as a potential format to the relevant `fmts` vec. On the negative: - It is not easily customiseable beyond the supported `chrono` parsing formats. E.g., `chrono` does not currently support parsing offsets without trailing zeros. `date -d "UTC+1"` should return a valid response but `chrono` fails to parse this. - Because it is an attempt driven parser, it is likely not that performant. I have not done any performance testing as part of this change, but I would expect a custom parser to perform much better. --- Cargo.lock | 1 + src/uu/date/src/date.rs | 4 +- src/uucore/Cargo.toml | 1 + src/uucore/src/lib/lib.rs | 1 + src/uucore/src/lib/parser.rs | 1 + src/uucore/src/lib/parser/parse_datetime.rs | 177 ++++++++++++++++++++ tests/by-util/test_date.rs | 23 +++ 7 files changed, 206 insertions(+), 2 deletions(-) create mode 100644 src/uucore/src/lib/parser/parse_datetime.rs diff --git a/Cargo.lock b/Cargo.lock index ffb99826649..da7944d8898 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3395,6 +3395,7 @@ version = "0.0.18" dependencies = [ "blake2b_simd", "blake3", + "chrono", "clap", "data-encoding", "data-encoding-macro", diff --git a/src/uu/date/src/date.rs b/src/uu/date/src/date.rs index 381619f06fb..65aa0631a0a 100644 --- a/src/uu/date/src/date.rs +++ b/src/uu/date/src/date.rs @@ -23,6 +23,7 @@ use uucore::display::Quotable; #[cfg(not(any(target_os = "redox")))] use uucore::error::FromIo; use uucore::error::{UResult, USimpleError}; +use uucore::parse_datetime::parse_datetime; use uucore::{format_usage, help_about, help_usage, show}; #[cfg(windows)] use windows_sys::Win32::{Foundation::SYSTEMTIME, System::SystemInformation::SetSystemTime}; @@ -398,8 +399,7 @@ fn make_format_string(settings: &Settings) -> &str { fn parse_date + Clone>( s: S, ) -> Result, (String, chrono::format::ParseError)> { - // TODO: The GNU date command can parse a wide variety of inputs. - s.as_ref().parse().map_err(|e| (s.as_ref().into(), e)) + parse_datetime(s.as_ref()) } #[cfg(not(any(unix, windows)))] diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index ea7fb44f3c0..b0f19039d2c 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -19,6 +19,7 @@ path="src/lib/lib.rs" [dependencies] clap = { workspace=true } +chrono = { workspace=true } uucore_procs = { workspace=true } dns-lookup = { version="1.0.8", optional=true } dunce = "1.0.4" diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index e76e540c8d8..ff1ca04b3c5 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -30,6 +30,7 @@ pub use crate::mods::update_control; pub use crate::mods::version_cmp; // * string parsing modules +pub use crate::parser::parse_datetime; pub use crate::parser::parse_glob; pub use crate::parser::parse_size; pub use crate::parser::parse_time; diff --git a/src/uucore/src/lib/parser.rs b/src/uucore/src/lib/parser.rs index 8eae16bbf28..76baccd9bf6 100644 --- a/src/uucore/src/lib/parser.rs +++ b/src/uucore/src/lib/parser.rs @@ -1,3 +1,4 @@ +pub mod parse_datetime; pub mod parse_glob; pub mod parse_size; pub mod parse_time; diff --git a/src/uucore/src/lib/parser/parse_datetime.rs b/src/uucore/src/lib/parser/parse_datetime.rs new file mode 100644 index 00000000000..990bc3fd1b5 --- /dev/null +++ b/src/uucore/src/lib/parser/parse_datetime.rs @@ -0,0 +1,177 @@ +use chrono::{DateTime, FixedOffset, Local, NaiveDateTime, TimeZone}; + +/// Formats that parse input can take. +/// Taken from `touch` core util +mod format { + pub(crate) const ISO_8601: &str = "%Y-%m-%d"; + pub(crate) const POSIX_LOCALE: &str = "%a %b %e %H:%M:%S %Y"; + pub(crate) const YYYYMMDDHHMM_DOT_SS: &str = "%Y%m%d%H%M.%S"; + pub(crate) const YYYYMMDDHHMMSS: &str = "%Y-%m-%d %H:%M:%S.%f"; + pub(crate) const YYYYMMDDHHMMS: &str = "%Y-%m-%d %H:%M:%S"; + pub(crate) const YYYY_MM_DD_HH_MM: &str = "%Y-%m-%d %H:%M"; + pub(crate) const YYYYMMDDHHMM: &str = "%Y%m%d%H%M"; + pub(crate) const YYYYMMDDHHMM_OFFSET: &str = "%Y%m%d%H%M %z"; + pub(crate) const YYYYMMDDHHMM_UTC_OFFSET: &str = "%Y%m%d%H%MUTC%z"; + pub(crate) const YYYYMMDDHHMM_ZULU_OFFSET: &str = "%Y%m%d%H%MZ%z"; + pub(crate) const YYYYMMDDHHMM_HYPHENATED_OFFSET: &str = "%Y-%m-%d %H:%M %z"; + pub(crate) const ISO_T_SEP: &str = "%Y-%m-%dT%H:%M:%S"; + pub(crate) const UTC_OFFSET: &str = "UTC%#z"; + pub(crate) const ZULU_OFFSET: &str = "Z%#z"; +} + +/// Parse a `String` into a `DateTime`. +/// If it fails, return a tuple of the `String` along with its `ParseError`. +/// +/// The purpose of this function is to provide a basic loose DateTime parser. +pub fn parse_datetime + Clone>( + s: S, +) -> Result, (String, chrono::format::ParseError)> { + // TODO: Replace with a proper customiseable parsing solution using `nom`, `grmtools`, or + // similar + + // Formats with offsets don't require NaiveDateTime workaround + for fmt in [ + format::YYYYMMDDHHMM_OFFSET, + format::YYYYMMDDHHMM_HYPHENATED_OFFSET, + format::YYYYMMDDHHMM_UTC_OFFSET, + format::YYYYMMDDHHMM_ZULU_OFFSET, + ] { + if let Ok(parsed) = DateTime::parse_from_str(s.as_ref(), fmt) { + return Ok(parsed); + } + } + + // Parse formats with no offset, assume local time + for fmt in [ + format::ISO_T_SEP, + format::YYYYMMDDHHMM, + format::YYYYMMDDHHMMS, + format::YYYYMMDDHHMMSS, + format::YYYY_MM_DD_HH_MM, + format::YYYYMMDDHHMM_DOT_SS, + format::POSIX_LOCALE, + ] { + if let Ok(parsed) = NaiveDateTime::parse_from_str(s.as_ref(), fmt) { + return Ok(naive_dt_to_fixed_offset(parsed)); + } + } + + // Parse epoch seconds + if s.as_ref().bytes().next() == Some(b'@') { + if let Ok(parsed) = NaiveDateTime::parse_from_str(&s.as_ref()[1..], "%s") { + return Ok(naive_dt_to_fixed_offset(parsed)); + } + } + + let ts = s.as_ref().to_owned() + "0000"; + // Parse date only formats - assume midnight local timezone + for fmt in [format::ISO_8601] { + let f = fmt.to_owned() + "%H%M"; + if let Ok(parsed) = NaiveDateTime::parse_from_str(&ts, &f) { + return Ok(naive_dt_to_fixed_offset(parsed)); + } + } + + // Parse offsets. chrono doesn't provide any functionality to parse + // offsets, so instead we replicate parse_date behaviour by getting + // the current date with local, and create a date time string at midnight, + // before trying offset suffixes + let local = Local::now(); + let ts = format!("{}", local.format("%Y%m%d")) + "0000" + s.as_ref(); + for fmt in [format::UTC_OFFSET, format::ZULU_OFFSET] { + let f = format::YYYYMMDDHHMM.to_owned() + fmt; + if let Ok(parsed) = DateTime::parse_from_str(&ts, &f) { + return Ok(parsed); + } + } + + // Default parse and failure + s.as_ref().parse().map_err(|e| (s.as_ref().into(), e)) +} + +// Convert NaiveDateTime to DateTime by assuming the offset +// is local time +fn naive_dt_to_fixed_offset(dt: NaiveDateTime) -> DateTime { + let now = Local::now(); + now.with_timezone(now.offset()); + now.offset().from_local_datetime(&dt).unwrap().into() +} + +#[cfg(test)] +mod tests { + static TEST_TIME: i64 = 1613371067; + + #[cfg(test)] + mod iso_8601 { + use std::env; + + use crate::{parse_datetime::parse_datetime, parse_datetime::tests::TEST_TIME}; + + #[test] + fn test_t_sep() { + env::set_var("TZ", "UTC"); + let dt = "2021-02-15T06:37:47"; + let actual = parse_datetime(dt); + assert_eq!(actual.unwrap().timestamp(), TEST_TIME); + } + + #[test] + fn test_space_sep() { + env::set_var("TZ", "UTC"); + let dt = "2021-02-15 06:37:47"; + let actual = parse_datetime(dt); + assert_eq!(actual.unwrap().timestamp(), TEST_TIME); + } + + #[test] + fn test_space_sep_offset() { + env::set_var("TZ", "UTC"); + let dt = "2021-02-14 22:37:47 -0800"; + let actual = parse_datetime(dt); + assert_eq!(actual.unwrap().timestamp(), TEST_TIME); + } + + #[test] + fn test_t_sep_offset() { + env::set_var("TZ", "UTC"); + let dt = "2021-02-14T22:37:47 -0800"; + let actual = parse_datetime(dt); + assert_eq!(actual.unwrap().timestamp(), TEST_TIME); + } + } + + #[cfg(test)] + mod offsets { + use chrono::Local; + + use crate::parse_datetime::parse_datetime; + + #[test] + fn test_positive_offsets() { + let offsets = vec![ + "UTC+07:00", + "UTC+0700", + "UTC+07", + "Z+07:00", + "Z+0700", + "Z+07", + ]; + + let expected = format!("{}{}", Local::now().format("%Y%m%d"), "0000+0700"); + for offset in offsets { + let actual = parse_datetime(offset).unwrap(); + assert_eq!(expected, format!("{}", actual.format("%Y%m%d%H%M%z"))); + } + } + + #[test] + fn test_partial_offset() { + let offsets = vec!["UTC+00:15", "UTC+0015", "Z+00:15", "Z+0015"]; + let expected = format!("{}{}", Local::now().format("%Y%m%d"), "0000+0015"); + for offset in offsets { + let actual = parse_datetime(offset).unwrap(); + assert_eq!(expected, format!("{}", actual.format("%Y%m%d%H%M%z"))); + } + } + } +} diff --git a/tests/by-util/test_date.rs b/tests/by-util/test_date.rs index c8c33aa8900..49745bed8ef 100644 --- a/tests/by-util/test_date.rs +++ b/tests/by-util/test_date.rs @@ -395,3 +395,26 @@ fn test_invalid_date_string() { .no_stdout() .stderr_contains("invalid date"); } + +#[test] +fn test_date_parse_iso8601() { + let dates = vec![ + "2023-03-27 08:30:00", + "2023-04-01 12:00:00", + "2023-04-15 18:30:00", + ]; + for date in dates { + new_ucmd!().arg("-d").arg(date).succeeds(); + } +} + +#[test] +fn test_date_parse_epoch() { + let date = "@2147483647"; + new_ucmd!() + .arg("-u") + .arg("-d") + .arg(date) + .succeeds() + .stdout_is("Tue Jan 19 03:14:07 2038\n"); +}