From 3d6f79d82604473fbccf81490c2de311eaf9bbe1 Mon Sep 17 00:00:00 2001 From: yuankunzhang Date: Tue, 22 Jul 2025 15:11:35 +0000 Subject: [PATCH] feat: improve the literal date parsers --- src/items/date.rs | 186 ++++++++++++++++++++++++++++++++++++++-------- src/items/mod.rs | 21 +++++- 2 files changed, 174 insertions(+), 33 deletions(-) diff --git a/src/items/date.rs b/src/items/date.rs index 4b17081..8f5f542 100644 --- a/src/items/date.rs +++ b/src/items/date.rs @@ -27,17 +27,15 @@ //! > ‘September’. use winnow::{ - ascii::alpha1, - combinator::{alt, opt, preceded, trace}, + ascii::{alpha1, multispace1}, + combinator::{alt, eof, opt, preceded, terminated, trace}, error::ErrMode, - seq, stream::AsChar, token::take_while, ModalResult, Parser, }; use super::primitive::{ctx_err, dec_uint, s}; -use crate::ParseDateTimeError; #[derive(PartialEq, Eq, Clone, Debug, Default)] pub struct Date { @@ -220,27 +218,58 @@ fn us(input: &mut &str) -> ModalResult { } } -/// Parse `14 November 2022`, `14 Nov 2022`, "14nov2022", "14-nov-2022", "14-nov2022", "14nov-2022" +/// Parse `14 November 2022`, `14 Nov 2022`, "14nov2022", "14-nov-2022", +/// "14-nov2022", "14nov-2022". fn literal1(input: &mut &str) -> ModalResult { - seq!(Date { - day: day, - _: opt(s('-')), - month: literal_month, - year: opt(preceded(opt(s('-')), year)), - }) - .parse_next(input) + let (day, _, month, year) = ( + s(dec_uint), + opt(s('-')), + s(literal_month), + opt(terminated( + preceded(opt(s('-')), s(take_while(1.., AsChar::is_dec_digit))), + // The year must be followed by a space or end of input. + alt((multispace1, eof)), + )), + ) + .parse_next(input)?; + + match year { + Some(year) => (year, month, day) + .try_into() + .map_err(|e| ErrMode::Cut(ctx_err(e))), + None => (month, day) + .try_into() + .map_err(|e| ErrMode::Cut(ctx_err(e))), + } } -/// Parse `November 14, 2022` and `Nov 14, 2022` +/// Parse `November 14, 2022`, `Nov 14, 2022`, and `Nov 14 2022`. fn literal2(input: &mut &str) -> ModalResult { - seq!(Date { - month: literal_month, - day: day, - // FIXME: GNU requires _some_ space between the day and the year, - // probably to distinguish with floats. - year: opt(preceded(s(","), year)), - }) - .parse_next(input) + let (month, day, year) = ( + s(literal_month), + s(dec_uint), + opt(terminated( + preceded( + // GNU quirk: for formats like `Nov 14, 2022`, there must be some + // space between the comma and the year. This is probably to + // distinguish with floats. + opt(s(terminated(',', multispace1))), + s(take_while(1.., AsChar::is_dec_digit)), + ), + // The year must be followed by a space or end of input. + alt((multispace1, eof)), + )), + ) + .parse_next(input)?; + + match year { + Some(year) => (year, month, day) + .try_into() + .map_err(|e| ErrMode::Cut(ctx_err(e))), + None => (month, day) + .try_into() + .map_err(|e| ErrMode::Cut(ctx_err(e))), + } } pub fn year(input: &mut &str) -> ModalResult { @@ -268,17 +297,6 @@ pub fn year(input: &mut &str) -> ModalResult { .parse_next(input) } -fn day(input: &mut &str) -> ModalResult { - s(dec_uint) - .try_map(|x| { - (1..=31) - .contains(&x) - .then_some(x) - .ok_or(ParseDateTimeError::InvalidInput) - }) - .parse_next(input) -} - /// Parse the name of a month (case-insensitive) fn literal_month(input: &mut &str) -> ModalResult { s(alpha1) @@ -462,6 +480,110 @@ mod tests { } } + #[test] + fn literal1() { + let reference = Date { + year: Some(2022), + month: 11, + day: 14, + }; + + for mut s in [ + "14 november 2022", + "14 nov 2022", + "14-nov-2022", + "14-nov2022", + "14nov2022", + "14nov 2022", + ] { + let old_s = s.to_owned(); + assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}"); + } + + let reference = Date { + year: None, + month: 11, + day: 14, + }; + + for mut s in ["14 november", "14 nov", "14-nov", "14nov"] { + let old_s = s.to_owned(); + assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}"); + } + + let reference = Date { + year: None, + month: 11, + day: 14, + }; + + // Year must be followed by a space or end of input. + let mut s = "14 nov 2022a"; + let old_s = s.to_owned(); + assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}"); + assert_eq!(s, " 2022a"); + + let mut s = "14 nov-2022a"; + let old_s = s.to_owned(); + assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}"); + assert_eq!(s, "-2022a"); + } + + #[test] + fn literal2() { + let reference = Date { + year: Some(2022), + month: 11, + day: 14, + }; + + for mut s in [ + "november 14 2022", + "november 14, 2022", + "november 14 , 2022", + "nov 14 2022", + "nov14 2022", + "nov14, 2022", + ] { + let old_s = s.to_owned(); + assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}"); + } + + let reference = Date { + year: None, + month: 11, + day: 14, + }; + + for mut s in ["november 14", "nov 14", "nov14"] { + let old_s = s.to_owned(); + assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}"); + } + + let reference = Date { + year: None, + month: 11, + day: 14, + }; + + // There must be some space between the comma and the year. + let mut s = "november 14,2022"; + let old_s = s.to_owned(); + assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}"); + assert_eq!(s, ",2022"); + + // Year must be followed by a space or end of input. + let mut s = "november 14 2022a"; + let old_s = s.to_owned(); + assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}"); + assert_eq!(s, " 2022a"); + + let mut s = "november 14, 2022a"; + let old_s = s.to_owned(); + assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}"); + assert_eq!(s, ", 2022a"); + } + #[test] fn with_year() { let reference = Date { diff --git a/src/items/mod.rs b/src/items/mod.rs index c59a832..41e2f7d 100644 --- a/src/items/mod.rs +++ b/src/items/mod.rs @@ -97,7 +97,7 @@ pub(crate) fn at_local( /// item = datetime | date | time | relative | weekday | timezone | year ; /// /// datetime = date , [ "T" | "t" | whitespace ] , iso_time ; -/// date = iso_date | us_date ; +/// date = iso_date | us_date | literal1_date | literal2_date ; /// /// iso_date = year , [ iso_date_delim ] , month , [ iso_date_delim ] , day ; /// iso_date_delim = [ { whitespace } ] , "-" , [ { whitespace } ] ; @@ -105,9 +105,28 @@ pub(crate) fn at_local( /// us_date = month , [ us_date_delim ] , day , [ [ us_date_delim ] , year ]; /// us_date_delim = [ { whitespace } ] , "/" , [ { whitespace } ] ; /// +/// literal1_date = day , [ literal1_date_delim ] , literal_month , [ [ literal1_date_delim ] , year ] ; +/// literal1_date_delim = { whitespace } | [ { whitespace } ] , "-" , [ { whitespace } ] ; +/// +/// literal2_date = literal_month , [ { whitespace } ] , day , [ [ literal2_date_delim ] , year ] ; +/// literal2_date_delim = { whitespace } | [ { whitespace } ] , "," , [ { whitespace } ] ; +/// /// year = dec_int ; /// month = dec_int ; /// day = dec_int ; +/// +/// literal_month = "january" | "jan" +/// | "february" | "feb" +/// | "march" | "mar" +/// | "april" | "apr" +/// | "may" +/// | "june" | "jun" +/// | "july" | "jul" +/// | "august" | "aug" +/// | "september" | "sept" | "sep" +/// | "october" | "oct" +/// | "november" | "nov" +/// | "december" | "dec" ; /// ``` pub(crate) fn parse(input: &mut &str) -> ModalResult { trace("parse", alt((parse_timestamp, parse_items))).parse_next(input)