diff --git a/Cargo.lock b/Cargo.lock index 240cd7886a7..4a3b480655a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1315,9 +1315,9 @@ dependencies = [ [[package]] name = "lazy_static" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "lazycell" @@ -2646,8 +2646,10 @@ version = "0.0.27" dependencies = [ "chrono", "clap", + "lazy_static", "libc", "parse_datetime", + "regex", "uucore", "windows-sys 0.48.0", ] @@ -3672,7 +3674,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] diff --git a/src/uu/date/Cargo.toml b/src/uu/date/Cargo.toml index 448a683a8a2..278c9708611 100644 --- a/src/uu/date/Cargo.toml +++ b/src/uu/date/Cargo.toml @@ -22,6 +22,8 @@ chrono = { workspace = true } clap = { workspace = true } uucore = { workspace = true } parse_datetime = { workspace = true } +regex = { workspace = true } +lazy_static = "1.5.0" [target.'cfg(unix)'.dependencies] libc = { workspace = true } diff --git a/src/uu/date/src/date.rs b/src/uu/date/src/date.rs index 02737dca28e..750189bba8e 100644 --- a/src/uu/date/src/date.rs +++ b/src/uu/date/src/date.rs @@ -24,6 +24,8 @@ use windows_sys::Win32::{Foundation::SYSTEMTIME, System::SystemInformation::SetS use uucore::shortcut_value_parser::ShortcutValueParser; +mod parser; + // Options const DATE: &str = "date"; const HOURS: &str = "hours"; @@ -222,7 +224,9 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { // Iterate over all dates - whether it's a single date or a file. let dates: Box> = match settings.date_source { DateSource::Custom(ref input) => { - let date = parse_date(input.clone()); + let date = parse_date(input.clone()) + // fallback to parser::parse_fb if parse_date fails + .or_else(|_| parser::parse_fb(input, now).map_err(|(i, e)| (i.to_string(), e))); let iter = std::iter::once(date); Box::new(iter) } diff --git a/src/uu/date/src/parser.rs b/src/uu/date/src/parser.rs new file mode 100644 index 00000000000..677f3b13609 --- /dev/null +++ b/src/uu/date/src/parser.rs @@ -0,0 +1,206 @@ +use std::str::FromStr; + +use chrono::offset::TimeZone; +use chrono::{DateTime, Datelike, FixedOffset, Local, TimeDelta, Timelike}; + +use lazy_static::lazy_static; +use regex::{Captures, Regex}; + +#[derive(Debug)] +enum Token { + Ymd(u32, u32, u32), + Hms(u32, u32, u32), + Ymdhms(u32, u32, u32, u32, u32, u32), +} + +trait RegexUtils { + fn unwrap_group(&self, name: &str) -> T + where + T: FromStr; +} + +impl RegexUtils for Captures<'_> { + fn unwrap_group(&self, name: &str) -> T + where + T: FromStr, + { + self.name(name).unwrap().as_str().parse::().unwrap() + } +} + +impl Token { + fn parse_ymd(token: &str) -> Option { + lazy_static! { + static ref ymd_regex: Regex = + Regex::new(r"(?\d{4})-(?\d{2})-(?\d{2})").unwrap(); + } + ymd_regex.captures(token).map(|m| { + let y = m.unwrap_group("year"); + let mo = m.unwrap_group("month"); + let d = m.unwrap_group("day"); + Self::Ymd(y, mo, d) + }) + } + + fn parse_choices(token: &str, choices: &'static str) -> Option { + let regex = Regex::new(choices).unwrap(); + regex + .captures(token) + .map(|m| m.get(1).unwrap().as_str().to_string()) + } + + fn parse_month_name(token: &str) -> Option { + let choices = + Self::parse_choices(token, r"(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dez)")?; + let month = match choices.as_str() { + "Jan" => 1, + "Feb" => 2, + "Mar" => 3, + "Apr" => 4, + "May" => 5, + "Jun" => 6, + "Jul" => 7, + "Aug" => 8, + "Sep" => 9, + "Oct" => 10, + "Nov" => 11, + "Dez" => 12, + _ => unreachable!(), + }; + Some(month) + } + + fn parse_hm(token: &str) -> Option { + lazy_static! { + static ref hm_regex: Regex = Regex::new(r"(?\d{2}):(?\d{2})").unwrap(); + } + hm_regex.captures(token).map(|m| { + let h = m.unwrap_group("hour"); + let mi = m.unwrap_group("minute"); + Self::Hms(h, mi, 0) + }) + } + + fn parse_hms(token: &str) -> Option { + lazy_static! { + static ref hms_regex: Regex = + Regex::new(r"(?\d{2}):(?\d{2}):(?\d{2})").unwrap(); + } + hms_regex + .captures(token) + .map(|m| { + let h = m.unwrap_group("hour"); + let mi = m.unwrap_group("minute"); + let s = m.unwrap_group("second"); + Self::Hms(h, mi, s) + }) + .or_else(|| Self::parse_hm(token)) + } + + fn parse_dateunit(token: &str) -> Option { + Self::parse_choices( + token, + r"(?second|minute|hour|day|week|month|year)s?", + ) + } + + fn parse_number_i32(token: &str) -> Option { + lazy_static! { + static ref number_regex: Regex = Regex::new(r"\+?(\d{1,9})$").unwrap(); + } + number_regex + .captures(token) + .and_then(|m| m.get(1).unwrap().as_str().parse::().ok()) + } + + // Parses date like + // "Jul 18 06:14:49 2024 GMT" +%s" + fn parse_with_month(input: &str, d: &DateTime) -> Option { + let mut tokens = input.split_whitespace(); + let month = Self::parse_month_name(tokens.next()?)?; + let day = Self::parse_number_i32(tokens.next()?)?; + let hms = Self::parse_hms(tokens.next()?)?; + let year = Self::parse_number_i32(tokens.next()?).unwrap_or(d.year()); + // @TODO: Parse the timezone + if let Self::Hms(hour, minute, second) = hms { + // Return the value + Some(Self::Ymdhms( + year as u32, + month as u32, + day as u32, + hour, + minute, + second, + )) + } else { + None + } + } + + fn parse(input: &str, mut d: DateTime) -> Result, String> { + // Parsing "Jul 18 06:14:49 2024 GMT" like dates + if let Some(Self::Ymdhms(year, mo, day, h, m, s)) = Self::parse_with_month(input, &d) { + d = Local + .with_ymd_and_hms(year as i32, mo, day, h, m, s) + .unwrap() + .into(); + return Ok(d); + } + + let mut tokens = input.split_whitespace().peekable(); + while let Some(token) = tokens.next() { + // Parse YMD + if let Some(Self::Ymd(year, mo, day)) = Self::parse_ymd(token) { + d = Local + .with_ymd_and_hms(year as i32, mo, day, d.hour(), d.minute(), d.second()) + .unwrap() + .into(); + continue; + } + // Parse HMS + else if let Some(Self::Hms(h, mi, s)) = Self::parse_hms(token) { + d = Local + .with_ymd_and_hms(d.year(), d.month(), d.day(), h, mi, s) + .unwrap() + .into(); + continue; + } + // Parse a number + else if let Some(number) = Self::parse_number_i32(token) { + let number: i64 = number.into(); + // Followed by a dateunit + let dateunit = tokens + .peek() + .and_then(|x| Self::parse_dateunit(x)) + .unwrap_or("hour".to_string()); + match dateunit.as_str() { + "second" => d += TimeDelta::seconds(number), + "minute" => d += TimeDelta::minutes(number), + "hour" => d += TimeDelta::hours(number), + "day" => d += TimeDelta::days(number), + "week" => d += TimeDelta::weeks(number), + "month" => d += TimeDelta::days(30), + "year" => d += TimeDelta::days(365), + _ => unreachable!(), + }; + tokens.next(); // consume the token + continue; + } + // Don't know how to parse this + else { + return Err(format!("Error parsing date, unexpected token {token}")); + } + } + + Ok(d) + } +} + +// Parse fallback for dates. It tries to parse `input` and update +// `d` accordingly. +pub fn parse_fb( + input: &str, + d: DateTime, +) -> Result, (&str, parse_datetime::ParseDateTimeError)> { + Token::parse(input, d).map_err(|_| (input, parse_datetime::ParseDateTimeError::InvalidInput)) +} diff --git a/tests/by-util/test_date.rs b/tests/by-util/test_date.rs index 553414af853..e7656af505a 100644 --- a/tests/by-util/test_date.rs +++ b/tests/by-util/test_date.rs @@ -420,6 +420,39 @@ fn test_invalid_date_string() { .stderr_contains("invalid date"); } +#[test] +fn test_invalid_date_fallback() { + new_ucmd!() + .arg("-u") + .arg("-d") + .arg("11111111") + .succeeds() + // how coreutils outputs + //.stdout_contains("Sat Nov 11 12:00:00 AM UTC 1111"); + .stdout_contains("Sat Nov 11 00:00:00 1111"); + + new_ucmd!() + .arg("-u") + .arg("-d") + .arg("2024-01-01 +351 day 00:00") + .succeeds() + .stdout_contains("Tue Dec 17 00:00:00 2024"); + + new_ucmd!() + .arg("-u") + .arg("-d") + .arg("2024-01-01 00:00 1 day 1 hour 1 minute 3 second") + .succeeds() + .stdout_contains("Tue Jan 2 01:01:03 2024"); + + new_ucmd!() + .arg("-d") + .arg("Jul 18 06:14:49 2024 GMT") + .succeeds() + //.stdout_contains("Jul 18 06:14:49 2024 GMT"); + .stdout_contains("Jul 18 06:14:49 2024"); +} + #[test] fn test_date_one_digit_date() { new_ucmd!()