From 582d2950185f34dd2446739d07954e5220a70abd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Thu, 19 May 2022 11:15:48 +0100
Subject: [PATCH 01/47] a nicer, mode extensible version of
 `build_format_from_order()` (less hardcoding)

---
 r/R/dplyr-datetime-helpers.R | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R
index bc1a13075dc..8b4b62dea02 100644
--- a/r/R/dplyr-datetime-helpers.R
+++ b/r/R/dplyr-datetime-helpers.R
@@ -201,19 +201,15 @@ build_formats <- function(orders) {
 }
 
 build_format_from_order <- function(order) {
-  year_chars <- c("%y", "%Y")
-  month_chars <- c("%m", "%B", "%b")
-  day_chars <- "%d"
-
-  outcome <- switch(
-    order,
-    "ymd" = expand.grid(year_chars, month_chars, day_chars),
-    "ydm" = expand.grid(year_chars, day_chars, month_chars),
-    "mdy" = expand.grid(month_chars, day_chars, year_chars),
-    "myd" = expand.grid(month_chars, year_chars, day_chars),
-    "dmy" = expand.grid(day_chars, month_chars, year_chars),
-    "dym" = expand.grid(day_chars, year_chars, month_chars)
+  char_list <- list(
+    "y" = c("%y", "%Y"),
+    "m" = c("%m", "%B", "%b"),
+    "d" = "%d"
   )
-  outcome$format <- paste(outcome$Var1, outcome$Var2, outcome$Var3, sep = "-")
-  outcome$format
+
+  split_order <- strsplit(order, split = "")[[1]]
+
+  outcome <- expand.grid(char_list[split_order])
+  format <- do.call(paste, c(outcome, sep = "-"))
+  format
 }

From c636bcb151afdf7521aea6ba99d17d112318feb3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Thu, 19 May 2022 12:09:12 +0100
Subject: [PATCH 02/47] extend the supported orders + unit tests

---
 r/R/dplyr-datetime-helpers.R                 | 12 ++++++++++--
 r/tests/testthat/test-dplyr-funcs-datetime.R | 14 --------------
 2 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R
index 8b4b62dea02..41a0fd31a3a 100644
--- a/r/R/dplyr-datetime-helpers.R
+++ b/r/R/dplyr-datetime-helpers.R
@@ -179,7 +179,11 @@ build_formats <- function(orders) {
     orders <- unique(c(orders1, orders2))
   }
 
-  supported_orders <- c("ymd", "ydm", "mdy", "myd", "dmy", "dym")
+  supported_orders <- c(
+    "ymd", "ydm", "mdy", "myd", "dmy", "dym",
+    "ymd_HMS", "ymd_HM", "ymd_H", "dmy_HMS", "dmy_HM", "dmy_H", "mdy_HMS",
+    "mdy_HM", "mdy_H", "ydm_HMS", "ydm_HM", "ydm_H")
+
   unsupported_passed_orders <- setdiff(orders, supported_orders)
   supported_passed_orders <- intersect(orders, supported_orders)
 
@@ -204,9 +208,13 @@ build_format_from_order <- function(order) {
   char_list <- list(
     "y" = c("%y", "%Y"),
     "m" = c("%m", "%B", "%b"),
-    "d" = "%d"
+    "d" = "%d",
+    "H" = "%H",
+    "M" = "%M",
+    "S" = "%S"
   )
 
+  order <- gsub("_", "", order)
   split_order <- strsplit(order, split = "")[[1]]
 
   outcome <- expand.grid(char_list[split_order])
diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index bcea41b0521..8a864cd5177 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -1744,20 +1744,6 @@ test_that("parse_date_time() works with a mix of formats and orders", {
   )
 })
 
-test_that("parse_date_time() doesn't work with hour, minutes, and second components", {
-  test_dates_times <- tibble(
-    date_times = c("09-01-17 12:34:56", NA)
-  )
-
-  expect_warning(
-    test_dates_times %>%
-      arrow_table() %>%
-      mutate(parsed_date_ymd = parse_date_time(date_times, orders = "ymd_HMS")) %>%
-      collect(),
-    '"ymd_HMS" `orders` not supported in Arrow'
-  )
-})
-
 test_that("year, month, day date/time parsers", {
   test_df <- tibble::tibble(
     ymd_string = c("2022-05-11", "2022/05/12", "22.05-13"),

From c38a993ebd8209d24a07fbd537c85b528d8ddb2e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Thu, 19 May 2022 14:16:24 +0100
Subject: [PATCH 03/47] add support for `truncated` and `quiet` arguments

---
 r/R/dplyr-datetime-helpers.R |  8 +++++---
 r/R/dplyr-funcs-datetime.R   | 12 +++++++++++-
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R
index 41a0fd31a3a..c8b9b7b6e97 100644
--- a/r/R/dplyr-datetime-helpers.R
+++ b/r/R/dplyr-datetime-helpers.R
@@ -179,10 +179,12 @@ build_formats <- function(orders) {
     orders <- unique(c(orders1, orders2))
   }
 
-  supported_orders <- c(
-    "ymd", "ydm", "mdy", "myd", "dmy", "dym",
+  ymd_orders <- c("ymd", "ydm", "mdy", "myd", "dmy", "dym")
+  ymd_hms_orders <- c(
     "ymd_HMS", "ymd_HM", "ymd_H", "dmy_HMS", "dmy_HM", "dmy_H", "mdy_HMS",
-    "mdy_HM", "mdy_H", "ydm_HMS", "ydm_HM", "ydm_H")
+    "mdy_HM", "mdy_H", "ydm_HMS", "ydm_HM", "ydm_H"
+  )
+  supported_orders <- c(ymd_orders, ymd_hms_orders)
 
   unsupported_passed_orders <- setdiff(orders, supported_orders)
   supported_passed_orders <- intersect(orders, supported_orders)
diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R
index bc0ddc4eed1..b12fdfac685 100644
--- a/r/R/dplyr-funcs-datetime.R
+++ b/r/R/dplyr-funcs-datetime.R
@@ -520,7 +520,17 @@ register_bindings_duration_helpers <- function() {
 register_bindings_datetime_parsers <- function() {
   register_binding("parse_date_time", function(x,
                                                orders,
-                                               tz = "UTC") {
+                                               tz = "UTC",
+                                               truncated = 0,
+                                               quiet = TRUE) {
+    if (!quiet) {
+      arrow_not_supported("`quiet = FALSE`")
+    }
+
+    if (truncated != 0) {
+      # build several orders for truncated formats
+      orders <- map_chr(0:truncated, ~ substr(orders, start = 1, stop = nchar(orders) - .x))
+    }
 
     # each order is translated into possible formats
     formats <- build_formats(orders)

From 5b2445a7e46c3cc80f7d699e7fca7dbc30c36945 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Thu, 19 May 2022 14:16:50 +0100
Subject: [PATCH 04/47] more unit tests

---
 r/tests/testthat/test-dplyr-funcs-datetime.R | 78 ++++++++++++++++++++
 1 file changed, 78 insertions(+)

diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 8a864cd5177..492041e05bd 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -1876,6 +1876,10 @@ test_that("lubridate's fast_strptime", {
     )
   )
 
+  # these functions' internals use some string processing which requires the
+  # RE2 library (not available on Windows with R 3.6)
+  skip_if_not_available("re2")
+
   compare_dplyr_binding(
     .input %>%
       mutate(
@@ -1950,4 +1954,78 @@ test_that("lubridate's fast_strptime", {
       ) %>%
       collect()
   )
+
+  test_dates_times2 <- tibble(
+    ymd_hms_string = c("67-Jan-09 12:34:56", "1970-June-22 20:13:59", NA),
+    ymd_hm_string = c("67-Jan-09 12:34", "1970-June-22 20:13", NA),
+    ymd_h_string = c("67-Jan-09 12", "1970-June-22 20", NA),
+    dmy_hms_string = c("09-Jan-67 12:34:56", "22-June-1970 20:13:59", NA),
+    dmy_hm_string = c("09-Jan-67 12:34", "22-June-1970 20:13", NA),
+    dmy_h_string = c("09-Jan-67 12", "22-June-1970 20", NA),
+    mdy_hms_string = c("Jan-09-67 12:34:56", "June-22-1970 20:13:59", NA),
+    mdy_hm_string = c("Jan-09-67 12:34", "June-22-1970 20:13", NA),
+    mdy_h_string = c("Jan-09-67 12", "June-22-1970 20", NA),
+    ydm_hms_string = c("67-09-Jan 12:34:56", "1970-22-June 20:13:59", NA),
+    ydm_hm_string = c("67-09-Jan 12:34", "1970-22-June 20:13", NA),
+    ydm_h_string = c("67-09-Jan 12", "1970-22-June 20", NA)
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        ymd_hms_dttm = parse_date_time(ymd_hms_string, orders = "ymd_HMS"),
+        ymd_hm_dttm  = parse_date_time(ymd_hm_string, orders = "ymd_HM"),
+        ymd_h_dttm   = parse_date_time(ymd_h_string, orders = "ymd_H"),
+        dmy_hms_dttm = parse_date_time(dmy_hms_string, orders = "dmy_HMS"),
+        dmy_hm_dttm  = parse_date_time(dmy_hm_string, orders = "dmy_HM"),
+        dmy_h_dttm   = parse_date_time(dmy_h_string, orders = "dmy_H"),
+        mdy_hms_dttm = parse_date_time(mdy_hms_string, orders = "mdy_HMS"),
+        mdy_hm_dttm  = parse_date_time(mdy_hm_string, orders = "mdy_HM"),
+        mdy_h_dttm   = parse_date_time(mdy_h_string, orders = "mdy_H"),
+        ydm_hms_dttm = parse_date_time(ydm_hms_string, orders = "ydm_HMS"),
+        ydm_hm_dttm  = parse_date_time(ydm_hm_string, orders = "ydm_HM"),
+        ydm_h_dttm   = parse_date_time(ydm_h_string, orders = "ydm_H"),
+        .keep = "used"
+      ) %>%
+      collect(),
+    test_dates_times2
+  )
+
+  # test truncated formats
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        dttm =
+          parse_date_time(
+            truncated_ymd_string,
+            orders = "ymd_HMS",
+            truncated = 3
+          )
+      ) %>%
+      collect(),
+    tibble(
+      truncated_ymd_string =
+        c(
+          "2022-05-19 13:46:51",
+          "2022-05-18 13:46",
+          "2022-05-17 13",
+          "2022-05-16"
+      )
+    )
+  )
+
+  # we need expect_warning twice as both the arrow pipeline (because quiet =
+  # FALSE is not supported) and the fallback dplyr/lubridate one throw
+  # warnings (the lubridate one because quiet is FALSE)
+  expect_warning(
+    expect_warning(
+      tibble(x = c("2022-05-19 13:46:51")) %>%
+        arrow_table() %>%
+        mutate(
+          x_dttm = parse_date_time(x, orders = "dmy_HMS", quiet = FALSE)
+        ) %>%
+        collect(),
+      "`quiet = FALSE` not supported in Arrow"
+    )
+  )
 })

From c5e275225963d700aefcb5864b0d3d5a191bf2fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Thu, 19 May 2022 14:31:09 +0100
Subject: [PATCH 05/47] all tests back in

---
 r/tests/testthat/test-dplyr-funcs-datetime.R | 75 +++++++++++++++++++-
 1 file changed, 74 insertions(+), 1 deletion(-)

diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 492041e05bd..b28df7141c0 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -1954,6 +1954,79 @@ test_that("lubridate's fast_strptime", {
       ) %>%
       collect()
   )
+})
+
+test_that("parse_date_time with hours, minutes and seconds components", {
+  test_dates_times <- tibble(
+    ymd_hms_string = c("67-01-09 12:34:56", "1970-05-22 20:13:59", NA),
+    ymd_hm_string = c("67-01-09 12:34", "1970-05-22 20:13", NA),
+    ymd_h_string = c("67-01-09 12", "1970-05-22 20", NA),
+    dmy_hms_string = c("09-01-67 12:34:56", "22-05-1970 20:13:59", NA),
+    dmy_hm_string = c("09-01-67 12:34", "22-05-1970 20:13", NA),
+    dmy_h_string = c("09-01-67 12", "22-05-1970 20", NA),
+    mdy_hms_string = c("01-09-67 12:34:56", "05-22-1970 20:13:59", NA),
+    mdy_hm_string = c("01-09-67 12:34", "05-22-1970 20:13", NA),
+    mdy_h_string = c("01-09-67 12", "05-22-1970 20", NA),
+    ydm_hms_string = c("67-09-01 12:34:56", "1970-22-05 20:13:59", NA),
+    ydm_hm_string = c("67-09-01 12:34", "1970-22-05 20:13", NA),
+    ydm_h_string = c("67-09-01 12", "1970-22-05 20", NA)
+  )
+
+  # these functions' internals use some string processing which requires the
+  # RE2 library (not available on Windows with R 3.6)
+  skip_if_not_available("re2")
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        ymd_hms_dttm = parse_date_time(ymd_hms_string, orders = "ymd_HMS"),
+        ymd_hm_dttm  = parse_date_time(ymd_hm_string, orders = "ymd_HM"),
+        ymd_h_dttm   = parse_date_time(ymd_h_string, orders = "ymd_H"),
+        dmy_hms_dttm = parse_date_time(dmy_hms_string, orders = "dmy_HMS"),
+        dmy_hm_dttm  = parse_date_time(dmy_hm_string, orders = "dmy_HM"),
+        dmy_h_dttm   = parse_date_time(dmy_h_string, orders = "dmy_H"),
+        mdy_hms_dttm = parse_date_time(mdy_hms_string, orders = "mdy_HMS"),
+        mdy_hm_dttm  = parse_date_time(mdy_hm_string, orders = "mdy_HM"),
+        mdy_h_dttm   = parse_date_time(mdy_h_string, orders = "mdy_H"),
+        ydm_hms_dttm = parse_date_time(ydm_hms_string, orders = "ydm_HMS"),
+        ydm_hm_dttm  = parse_date_time(ydm_hm_string, orders = "ydm_HM"),
+        ydm_h_dttm   = parse_date_time(ydm_h_string, orders = "ydm_H")
+      ) %>%
+      collect(),
+    test_dates_times
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        ymd_hms_dttm =
+          parse_date_time(ymd_hms_string, orders = "ymd_HMS", tz = "Pacific/Marquesas"),
+        ymd_hm_dttm =
+          parse_date_time(ymd_hm_string, orders = "ymd_HM", tz = "Pacific/Marquesas"),
+        ymd_h_dttm =
+          parse_date_time(ymd_h_string, orders = "ymd_H", tz = "Pacific/Marquesas"),
+        dmy_hms_dttm =
+          parse_date_time(dmy_hms_string, orders = "dmy_HMS", tz = "Pacific/Marquesas"),
+        dmy_hm_dttm =
+          parse_date_time(dmy_hm_string, orders = "dmy_HM", tz = "Pacific/Marquesas"),
+        dmy_h_dttm =
+          parse_date_time(dmy_h_string, orders = "dmy_H", tz = "Pacific/Marquesas"),
+        mdy_hms_dttm =
+          parse_date_time(mdy_hms_string, orders = "mdy_HMS", tz = "Pacific/Marquesas"),
+        mdy_hm_dttm =
+          parse_date_time(mdy_hm_string, orders = "mdy_HM", tz = "Pacific/Marquesas"),
+        mdy_h_dttm =
+          parse_date_time(mdy_h_string, orders = "mdy_H", tz = "Pacific/Marquesas"),
+        ydm_hms_dttm =
+          parse_date_time(ydm_hms_string, orders = "ydm_HMS", tz = "Pacific/Marquesas"),
+        ydm_hm_dttm =
+          parse_date_time(ydm_hm_string, orders = "ydm_HM", tz = "Pacific/Marquesas"),
+        ydm_h_dttm =
+          parse_date_time(ydm_h_string, orders = "ydm_H", tz = "Pacific/Marquesas")
+      ) %>%
+      collect(),
+    test_dates_times
+  )
 
   test_dates_times2 <- tibble(
     ymd_hms_string = c("67-Jan-09 12:34:56", "1970-June-22 20:13:59", NA),
@@ -2010,7 +2083,7 @@ test_that("lubridate's fast_strptime", {
           "2022-05-18 13:46",
           "2022-05-17 13",
           "2022-05-16"
-      )
+        )
     )
   )
 

From fa47c9c4dcfec57a1bc922838b95fa6f85722be1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Thu, 19 May 2022 14:47:07 +0100
Subject: [PATCH 06/47] support `ymd HMS` and `ymdHMS` orders

---
 r/R/dplyr-datetime-helpers.R | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R
index c8b9b7b6e97..e59cd025f00 100644
--- a/r/R/dplyr-datetime-helpers.R
+++ b/r/R/dplyr-datetime-helpers.R
@@ -156,7 +156,7 @@ build_formats <- function(orders) {
   # only keep the letters and the underscore as separator -> allow the users to
   # pass strptime-like formats (with "%"). Processing is needed (instead of passing
   # formats as-is) due to the processing of the character vector in parse_date_time()
-  orders <- gsub("[^A-Za-z_]", "", orders)
+  orders <- gsub("[^A-Za-z]", "", orders)
   orders <- gsub("Y", "y", orders)
 
   # we separate "ym', "my", and "yq" from the rest of the `orders` vector and
@@ -184,7 +184,13 @@ build_formats <- function(orders) {
     "ymd_HMS", "ymd_HM", "ymd_H", "dmy_HMS", "dmy_HM", "dmy_H", "mdy_HMS",
     "mdy_HM", "mdy_H", "ydm_HMS", "ydm_HM", "ydm_H"
   )
-  supported_orders <- c(ymd_orders, ymd_hms_orders)
+
+  supported_orders <- c(
+    ymd_orders,
+    ymd_hms_orders,
+    gsub("_", " ", ymd_hms_orders), # allow "_", " " and "" as separators
+    gsub("_", "", ymd_hms_orders)
+  )
 
   unsupported_passed_orders <- setdiff(orders, supported_orders)
   supported_passed_orders <- intersect(orders, supported_orders)
@@ -216,7 +222,6 @@ build_format_from_order <- function(order) {
     "S" = "%S"
   )
 
-  order <- gsub("_", "", order)
   split_order <- strsplit(order, split = "")[[1]]
 
   outcome <- expand.grid(char_list[split_order])

From 8ea9845e5b62db5199d7b9a0b6e66e9af3f9c3e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Thu, 19 May 2022 14:48:05 +0100
Subject: [PATCH 07/47] test `ydmHM` and `ydmH` orders

---
 r/tests/testthat/test-dplyr-funcs-datetime.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index b28df7141c0..bb968f17e17 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -1989,8 +1989,8 @@ test_that("parse_date_time with hours, minutes and seconds components", {
         mdy_hm_dttm  = parse_date_time(mdy_hm_string, orders = "mdy_HM"),
         mdy_h_dttm   = parse_date_time(mdy_h_string, orders = "mdy_H"),
         ydm_hms_dttm = parse_date_time(ydm_hms_string, orders = "ydm_HMS"),
-        ydm_hm_dttm  = parse_date_time(ydm_hm_string, orders = "ydm_HM"),
-        ydm_h_dttm   = parse_date_time(ydm_h_string, orders = "ydm_H")
+        ydm_hm_dttm  = parse_date_time(ydm_hm_string, orders = "ydmHM"),
+        ydm_h_dttm   = parse_date_time(ydm_h_string, orders = "ydmH")
       ) %>%
       collect(),
     test_dates_times

From 5e44e06d17025c39283abf15fca48e2c71a77e4d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Thu, 19 May 2022 15:37:23 +0100
Subject: [PATCH 08/47] reorganised tests so we can skip locale-dependent ones
 on windows

---
 r/tests/testthat/test-dplyr-funcs-datetime.R | 76 ++++++++++----------
 1 file changed, 40 insertions(+), 36 deletions(-)

diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index bb968f17e17..e84cb9437a6 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -2028,42 +2028,6 @@ test_that("parse_date_time with hours, minutes and seconds components", {
     test_dates_times
   )
 
-  test_dates_times2 <- tibble(
-    ymd_hms_string = c("67-Jan-09 12:34:56", "1970-June-22 20:13:59", NA),
-    ymd_hm_string = c("67-Jan-09 12:34", "1970-June-22 20:13", NA),
-    ymd_h_string = c("67-Jan-09 12", "1970-June-22 20", NA),
-    dmy_hms_string = c("09-Jan-67 12:34:56", "22-June-1970 20:13:59", NA),
-    dmy_hm_string = c("09-Jan-67 12:34", "22-June-1970 20:13", NA),
-    dmy_h_string = c("09-Jan-67 12", "22-June-1970 20", NA),
-    mdy_hms_string = c("Jan-09-67 12:34:56", "June-22-1970 20:13:59", NA),
-    mdy_hm_string = c("Jan-09-67 12:34", "June-22-1970 20:13", NA),
-    mdy_h_string = c("Jan-09-67 12", "June-22-1970 20", NA),
-    ydm_hms_string = c("67-09-Jan 12:34:56", "1970-22-June 20:13:59", NA),
-    ydm_hm_string = c("67-09-Jan 12:34", "1970-22-June 20:13", NA),
-    ydm_h_string = c("67-09-Jan 12", "1970-22-June 20", NA)
-  )
-
-  compare_dplyr_binding(
-    .input %>%
-      mutate(
-        ymd_hms_dttm = parse_date_time(ymd_hms_string, orders = "ymd_HMS"),
-        ymd_hm_dttm  = parse_date_time(ymd_hm_string, orders = "ymd_HM"),
-        ymd_h_dttm   = parse_date_time(ymd_h_string, orders = "ymd_H"),
-        dmy_hms_dttm = parse_date_time(dmy_hms_string, orders = "dmy_HMS"),
-        dmy_hm_dttm  = parse_date_time(dmy_hm_string, orders = "dmy_HM"),
-        dmy_h_dttm   = parse_date_time(dmy_h_string, orders = "dmy_H"),
-        mdy_hms_dttm = parse_date_time(mdy_hms_string, orders = "mdy_HMS"),
-        mdy_hm_dttm  = parse_date_time(mdy_hm_string, orders = "mdy_HM"),
-        mdy_h_dttm   = parse_date_time(mdy_h_string, orders = "mdy_H"),
-        ydm_hms_dttm = parse_date_time(ydm_hms_string, orders = "ydm_HMS"),
-        ydm_hm_dttm  = parse_date_time(ydm_hm_string, orders = "ydm_HM"),
-        ydm_h_dttm   = parse_date_time(ydm_h_string, orders = "ydm_H"),
-        .keep = "used"
-      ) %>%
-      collect(),
-    test_dates_times2
-  )
-
   # test truncated formats
   compare_dplyr_binding(
     .input %>%
@@ -2101,4 +2065,44 @@ test_that("parse_date_time with hours, minutes and seconds components", {
       "`quiet = FALSE` not supported in Arrow"
     )
   )
+
+  test_dates_times2 <- tibble(
+    ymd_hms_string = c("67-Jan-09 12:34:56", "1970-June-22 20:13:59", NA),
+    ymd_hm_string = c("67-Jan-09 12:34", "1970-June-22 20:13", NA),
+    ymd_h_string = c("67-Jan-09 12", "1970-June-22 20", NA),
+    dmy_hms_string = c("09-Jan-67 12:34:56", "22-June-1970 20:13:59", NA),
+    dmy_hm_string = c("09-Jan-67 12:34", "22-June-1970 20:13", NA),
+    dmy_h_string = c("09-Jan-67 12", "22-June-1970 20", NA),
+    mdy_hms_string = c("Jan-09-67 12:34:56", "June-22-1970 20:13:59", NA),
+    mdy_hm_string = c("Jan-09-67 12:34", "June-22-1970 20:13", NA),
+    mdy_h_string = c("Jan-09-67 12", "June-22-1970 20", NA),
+    ydm_hms_string = c("67-09-Jan 12:34:56", "1970-22-June 20:13:59", NA),
+    ydm_hm_string = c("67-09-Jan 12:34", "1970-22-June 20:13", NA),
+    ydm_h_string = c("67-09-Jan 12", "1970-22-June 20", NA)
+  )
+
+  # locale (affecting "%b% and "%B" formats) does not work properly on Windows
+  # TODO revisit once https://issues.apache.org/jira/browse/ARROW-16443 is done
+  skip_on_os("windows")
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        ymd_hms_dttm = parse_date_time(ymd_hms_string, orders = "ymd_HMS"),
+        ymd_hm_dttm  = parse_date_time(ymd_hm_string, orders = "ymd_HM"),
+        ymd_h_dttm   = parse_date_time(ymd_h_string, orders = "ymd_H"),
+        dmy_hms_dttm = parse_date_time(dmy_hms_string, orders = "dmy_HMS"),
+        dmy_hm_dttm  = parse_date_time(dmy_hm_string, orders = "dmy_HM"),
+        dmy_h_dttm   = parse_date_time(dmy_h_string, orders = "dmy_H"),
+        mdy_hms_dttm = parse_date_time(mdy_hms_string, orders = "mdy_HMS"),
+        mdy_hm_dttm  = parse_date_time(mdy_hm_string, orders = "mdy_HM"),
+        mdy_h_dttm   = parse_date_time(mdy_h_string, orders = "mdy_H"),
+        ydm_hms_dttm = parse_date_time(ydm_hms_string, orders = "ydm_HMS"),
+        ydm_hm_dttm  = parse_date_time(ydm_hm_string, orders = "ydm_HM"),
+        ydm_h_dttm   = parse_date_time(ydm_h_string, orders = "ydm_H"),
+        .keep = "used"
+      ) %>%
+      collect(),
+    test_dates_times2
+  )
 })

From 93346ee5eb33d25c11b451728f51bf0c5c96b794 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Thu, 19 May 2022 19:15:47 +0100
Subject: [PATCH 09/47] removed commented code referenced #13174

---
 r/tests/testthat/test-dplyr-funcs-datetime.R | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index e84cb9437a6..a41c78a06c1 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -1837,9 +1837,7 @@ test_that("lubridate's fast_strptime", {
       collect(),
     tibble(
       x = c("2018-10-07 19:04:05", "2022-05-17 21:23:45", NA)
-    )#,
-    # arrow does not preserve the `tzone` attribute
-    # test ignore_attr = TRUE
+    )
   )
 
   # R object
@@ -1856,8 +1854,7 @@ test_that("lubridate's fast_strptime", {
       collect(),
     tibble(
       x = c("2018-10-07 19:04:05", NA)
-    )#,
-    # test ignore_attr = TRUE
+    )
   )
 
   compare_dplyr_binding(
@@ -1913,9 +1910,7 @@ test_that("lubridate's fast_strptime", {
     tibble(
       x =
         c("68-10-07 19:04:05", "69-10-07 19:04:05", NA)
-    )#,
-    # arrow does not preserve the `tzone` attribute
-    # test ignore_attr = TRUE
+    )
   )
 
   # the arrow binding errors for a value different from 68L for `cutoff_2000`

From 3b213f98985b1cefee8ae1d8ef5fe92e2a5fea46 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Fri, 20 May 2022 09:40:26 +0100
Subject: [PATCH 10/47] added support (and unit tests) for unseparated strings

---
 r/R/dplyr-datetime-helpers.R                 |   5 +-
 r/R/dplyr-funcs-datetime.R                   |  28 +++--
 r/tests/testthat/test-dplyr-funcs-datetime.R | 101 ++++++++++++-------
 3 files changed, 87 insertions(+), 47 deletions(-)

diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R
index e59cd025f00..fd53e79d50c 100644
--- a/r/R/dplyr-datetime-helpers.R
+++ b/r/R/dplyr-datetime-helpers.R
@@ -225,6 +225,7 @@ build_format_from_order <- function(order) {
   split_order <- strsplit(order, split = "")[[1]]
 
   outcome <- expand.grid(char_list[split_order])
-  format <- do.call(paste, c(outcome, sep = "-"))
-  format
+  formats_with_sep <- do.call(paste, c(outcome, sep = "-"))
+  formats_without_sep <- do.call(paste, c(outcome, sep = ""))
+  c(formats_with_sep, formats_without_sep)
 }
diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R
index b12fdfac685..9f279add2b1 100644
--- a/r/R/dplyr-funcs-datetime.R
+++ b/r/R/dplyr-funcs-datetime.R
@@ -546,7 +546,13 @@ register_bindings_datetime_parsers <- function() {
     # for `ym` and `my` orders we add a day ("01")
     augmented_x <- NULL
     if (any(orders %in% c("ym", "my"))) {
-      augmented_x <- call_binding("paste0", x, "-01")
+      # add day as "-01" if there is a "-" separator and as "01" if not
+      augmented_x <- call_binding(
+        "if_else",
+        call_binding("grepl", "-", x),
+        call_binding("paste0", x, "-01"),
+        call_binding("paste0", x, "01")
+      )
     }
 
     # for `yq` we need to transform the quarter into the start month (lubridate
@@ -622,11 +628,21 @@ register_bindings_datetime_parsers <- function() {
     }
 
     # combine all attempts expressions in prep for coalesce
-    parse_attempt_expressions <- c(
-      parse_attempt_expressions,
-      parse_attempt_exp_augmented_x,
-      parse_attempt_exp_augmented_x2
-    )
+    # if the users passes only a short order (`ym`, `my` or `yq`) then only use
+    # the corresponding augmented_x
+    if (all(orders == "ym") || all(orders == "my")) {
+      parse_attempt_expressions <- parse_attempt_exp_augmented_x
+    } else if (all(orders == "yq")) {
+      parse_attempt_expressions <- parse_attempt_exp_augmented_x2
+    } else {
+      parse_attempt_expressions <- c(
+        # if we have an augmented x give preference to the corresponding
+        # parsing attempts
+        parse_attempt_exp_augmented_x,
+        parse_attempt_exp_augmented_x2,
+        parse_attempt_expressions
+      )
+    }
 
     coalesce_output <- build_expr("coalesce", args = parse_attempt_expressions)
 
diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index a41c78a06c1..b1b5c589c13 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -1678,22 +1678,17 @@ test_that("parse_date_time() works with year, month, and date components", {
       string_ymd = c(
         "2021-09-1", "2021/09///2", "2021.09.03", "2021,09,4", "2021:09::5",
         "2021 09   6", "21-09-07", "21/09/08", "21.09.9", "21,09,10", "21:09:11",
-        # not yet working for strings with no separators, like "20210917", "210918" or "2021Sep19
-        # no separators and %b or %B are even more complicated (and they work in
-        # lubridate). not to mention locale
-        NA
+        "20210912", "210913", NA
       ),
       string_dmy = c(
         "1-09-2021", "2/09//2021", "03.09.2021", "04,09,2021", "5:::09:2021",
         "6  09  2021", "07-09-21", "08/09/21", "9.09.21", "10,09,21", "11:09:21",
-        # not yet working for strings with no separators, like "10092021", "100921",
-        NA
+        "12092021", "130921", NA
       ),
       string_mdy = c(
         "09-01-2021", "09/2/2021", "09.3.2021", "09,04,2021", "09:05:2021",
         "09 6 2021", "09-7-21", "09/08/21", "09.9.21", "09,10,21", "09:11:21",
-        # not yet working for strings with no separators, like "09102021", "091021",
-        NA
+        "09122021", "091321", NA
       )
     )
   )
@@ -1711,13 +1706,16 @@ test_that("parse_date_time() works with year, month, and date components", {
       collect(),
     tibble::tibble(
       string_ymd = c(
-        "2021 Sep 12", "2021 September 13", "21 Sep 14", "21 September 15", NA
+        "2021 Sep 12", "2021 September 13", "21 Sep 14", "21 September 15",
+        "2021Sep16", NA
       ),
       string_dmy = c(
-        "12 Sep 2021", "13 September 2021", "14 Sep 21", "15 September 21", NA
+        "12 Sep 2021", "13 September 2021", "14 Sep 21", "15 September 21",
+        "16Sep2021",NA
       ),
       string_mdy = c(
-        "Sep 12 2021", "September 13 2021", "Sep 14 21", "September 15 21", NA
+        "Sep 12 2021", "September 13 2021", "Sep 14 21", "September 15 21",
+        "Sep1621", NA
       )
     )
   )
@@ -1953,19 +1951,32 @@ test_that("lubridate's fast_strptime", {
 
 test_that("parse_date_time with hours, minutes and seconds components", {
   test_dates_times <- tibble(
-    ymd_hms_string = c("67-01-09 12:34:56", "1970-05-22 20:13:59", NA),
-    ymd_hm_string = c("67-01-09 12:34", "1970-05-22 20:13", NA),
-    ymd_h_string = c("67-01-09 12", "1970-05-22 20", NA),
-    dmy_hms_string = c("09-01-67 12:34:56", "22-05-1970 20:13:59", NA),
-    dmy_hm_string = c("09-01-67 12:34", "22-05-1970 20:13", NA),
-    dmy_h_string = c("09-01-67 12", "22-05-1970 20", NA),
-    mdy_hms_string = c("01-09-67 12:34:56", "05-22-1970 20:13:59", NA),
-    mdy_hm_string = c("01-09-67 12:34", "05-22-1970 20:13", NA),
-    mdy_h_string = c("01-09-67 12", "05-22-1970 20", NA),
-    ydm_hms_string = c("67-09-01 12:34:56", "1970-22-05 20:13:59", NA),
-    ydm_hm_string = c("67-09-01 12:34", "1970-22-05 20:13", NA),
-    ydm_h_string = c("67-09-01 12", "1970-22-05 20", NA)
-  )
+    ymd_hms_string =
+      c("67-01-09 12:34:56", "1970-05-22 20:13:59", "870822201359", NA),
+    ymd_hm_string =
+      c("67-01-09 12:34", "1970-05-22 20:13", "8708222013", NA),
+    ymd_h_string =
+      c("67-01-09 12", "1970-05-22 20", "87082220", NA),
+    dmy_hms_string =
+      c("09-01-67 12:34:56", "22-05-1970 20:13:59", "220887201359", NA),
+    dmy_hm_string =
+      c("09-01-67 12:34", "22-05-1970 20:13",  "2208872013", NA),
+    dmy_h_string =
+      c("09-01-67 12", "22-05-1970 20", "22088720", NA),
+    mdy_hms_string =
+      c("01-09-67 12:34:56", "05-22-1970 20:13:59", "082287201359", NA),
+    mdy_hm_string =
+      c("01-09-67 12:34", "05-22-1970 20:13", "0822872013", NA),
+    mdy_h_string =
+      c("01-09-67 12", "05-22-1970 20", "08228720", NA),
+    ydm_hms_string =
+      c("67-09-01 12:34:56", "1970-22-05 20:13:59", "872208201359", NA),
+    ydm_hm_string =
+      c("67-09-01 12:34", "1970-22-05 20:13", "8722082013", NA),
+    ydm_h_string =
+      c("67-09-01 12", "1970-22-05 20", "87220820", NA)
+  )
+  # the unseparated strings are versions of "1987-08-22 20:13:59" (with %y)
 
   # these functions' internals use some string processing which requires the
   # RE2 library (not available on Windows with R 3.6)
@@ -2062,19 +2073,32 @@ test_that("parse_date_time with hours, minutes and seconds components", {
   )
 
   test_dates_times2 <- tibble(
-    ymd_hms_string = c("67-Jan-09 12:34:56", "1970-June-22 20:13:59", NA),
-    ymd_hm_string = c("67-Jan-09 12:34", "1970-June-22 20:13", NA),
-    ymd_h_string = c("67-Jan-09 12", "1970-June-22 20", NA),
-    dmy_hms_string = c("09-Jan-67 12:34:56", "22-June-1970 20:13:59", NA),
-    dmy_hm_string = c("09-Jan-67 12:34", "22-June-1970 20:13", NA),
-    dmy_h_string = c("09-Jan-67 12", "22-June-1970 20", NA),
-    mdy_hms_string = c("Jan-09-67 12:34:56", "June-22-1970 20:13:59", NA),
-    mdy_hm_string = c("Jan-09-67 12:34", "June-22-1970 20:13", NA),
-    mdy_h_string = c("Jan-09-67 12", "June-22-1970 20", NA),
-    ydm_hms_string = c("67-09-Jan 12:34:56", "1970-22-June 20:13:59", NA),
-    ydm_hm_string = c("67-09-Jan 12:34", "1970-22-June 20:13", NA),
-    ydm_h_string = c("67-09-Jan 12", "1970-22-June 20", NA)
-  )
+    ymd_hms_string =
+      c("67-Jan-09 12:34:56", "1970-June-22 20:13:59", "87Aug22201359", NA),
+    ymd_hm_string =
+      c("67-Jan-09 12:34", "1970-June-22 20:13", "87Aug222013", NA),
+    ymd_h_string =
+      c("67-Jan-09 12", "1970-June-22 20", "87Aug2220", NA),
+    dmy_hms_string =
+      c("09-Jan-67 12:34:56", "22-June-1970 20:13:59", "22Aug87201359", NA),
+    dmy_hm_string =
+      c("09-Jan-67 12:34", "22-June-1970 20:13", "22Aug872013", NA),
+    dmy_h_string =
+      c("09-Jan-67 12", "22-June-1970 20", "22Aug8720", NA),
+    mdy_hms_string =
+      c("Jan-09-67 12:34:56", "June-22-1970 20:13:59", "Aug2287201359", NA),
+    mdy_hm_string =
+      c("Jan-09-67 12:34", "June-22-1970 20:13", "Aug22872013", NA),
+    mdy_h_string =
+      c("Jan-09-67 12", "June-22-1970 20", "Aug228720", NA),
+    ydm_hms_string =
+      c("67-09-Jan 12:34:56", "1970-22-June 20:13:59", "8722Aug201359", NA),
+    ydm_hm_string =
+      c("67-09-Jan 12:34", "1970-22-June 20:13", "8722Aug2013", NA),
+    ydm_h_string =
+      c("67-09-Jan 12", "1970-22-June 20", "8722Aug20", NA)
+  )
+  # the unseparated strings are versions of "1987-08-22 20:13:59" (with %y)
 
   # locale (affecting "%b% and "%B" formats) does not work properly on Windows
   # TODO revisit once https://issues.apache.org/jira/browse/ARROW-16443 is done
@@ -2094,8 +2118,7 @@ test_that("parse_date_time with hours, minutes and seconds components", {
         mdy_h_dttm   = parse_date_time(mdy_h_string, orders = "mdy_H"),
         ydm_hms_dttm = parse_date_time(ydm_hms_string, orders = "ydm_HMS"),
         ydm_hm_dttm  = parse_date_time(ydm_hm_string, orders = "ydm_HM"),
-        ydm_h_dttm   = parse_date_time(ydm_h_string, orders = "ydm_H"),
-        .keep = "used"
+        ydm_h_dttm   = parse_date_time(ydm_h_string, orders = "ydm_H")
       ) %>%
       collect(),
     test_dates_times2

From 715201d071c0d745a9cb0a43c3e058648599eb4b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Fri, 20 May 2022 09:42:22 +0100
Subject: [PATCH 11/47] update NEWS

---
 r/NEWS.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/r/NEWS.md b/r/NEWS.md
index 6d25aa2154f..d88be229640 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -20,8 +20,7 @@
 # arrow 8.0.0.9000
 
 * `lubridate::parse_date_time()` datetime parser:
-  * currently parses only `orders` with year, month, and day components. In a future release `orders` support for other datetime components (such as hours, minutes, seconds, etc) will be added.
-  * strings with no separators (e.g. `"20210917"`) could be ambiguous and are not yet supported.
+  * `orders` with year, month, day, hours, minutes, and seconds components are supported.
   * the `orders` argument in the Arrow binding works as follows: `orders` are transformed into `formats` which subsequently get applied in turn. There is no `select_formats` parameter and no inference takes place (like is the case in `lubridate::parse_date_time()`).
 
 # arrow 8.0.0

From 5353532b9b52efe16f6d9e902924116fbc3e37cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Fri, 20 May 2022 09:47:50 +0100
Subject: [PATCH 12/47] orders without `"_"`

---
 r/tests/testthat/test-dplyr-funcs-datetime.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index b1b5c589c13..bd983cba91e 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -2108,16 +2108,16 @@ test_that("parse_date_time with hours, minutes and seconds components", {
     .input %>%
       mutate(
         ymd_hms_dttm = parse_date_time(ymd_hms_string, orders = "ymd_HMS"),
-        ymd_hm_dttm  = parse_date_time(ymd_hm_string, orders = "ymd_HM"),
+        ymd_hm_dttm  = parse_date_time(ymd_hm_string, orders = "ymdHM"),
         ymd_h_dttm   = parse_date_time(ymd_h_string, orders = "ymd_H"),
         dmy_hms_dttm = parse_date_time(dmy_hms_string, orders = "dmy_HMS"),
-        dmy_hm_dttm  = parse_date_time(dmy_hm_string, orders = "dmy_HM"),
+        dmy_hm_dttm  = parse_date_time(dmy_hm_string, orders = "dmyHM"),
         dmy_h_dttm   = parse_date_time(dmy_h_string, orders = "dmy_H"),
         mdy_hms_dttm = parse_date_time(mdy_hms_string, orders = "mdy_HMS"),
-        mdy_hm_dttm  = parse_date_time(mdy_hm_string, orders = "mdy_HM"),
+        mdy_hm_dttm  = parse_date_time(mdy_hm_string, orders = "mdyHM"),
         mdy_h_dttm   = parse_date_time(mdy_h_string, orders = "mdy_H"),
         ydm_hms_dttm = parse_date_time(ydm_hms_string, orders = "ydm_HMS"),
-        ydm_hm_dttm  = parse_date_time(ydm_hm_string, orders = "ydm_HM"),
+        ydm_hm_dttm  = parse_date_time(ydm_hm_string, orders = "ydmHM"),
         ydm_h_dttm   = parse_date_time(ydm_h_string, orders = "ydm_H")
       ) %>%
       collect(),

From 57c02d67a1a2b9e85dbab1c0a4e22a125b535d92 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Fri, 20 May 2022 10:46:10 +0100
Subject: [PATCH 13/47] lint

---
 r/tests/testthat/test-dplyr-funcs-datetime.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index bd983cba91e..ab564376d0c 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -1711,7 +1711,7 @@ test_that("parse_date_time() works with year, month, and date components", {
       ),
       string_dmy = c(
         "12 Sep 2021", "13 September 2021", "14 Sep 21", "15 September 21",
-        "16Sep2021",NA
+        "16Sep2021", NA
       ),
       string_mdy = c(
         "Sep 12 2021", "September 13 2021", "Sep 14 21", "September 15 21",

From ecc62ba28e43955f96aaf557203b16d76afec761 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Fri, 20 May 2022 12:26:27 +0100
Subject: [PATCH 14/47] unit test for `exact = TRUE`

---
 r/tests/testthat/test-dplyr-funcs-datetime.R | 27 ++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index ab564376d0c..134ba57a259 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -2124,3 +2124,30 @@ test_that("parse_date_time with hours, minutes and seconds components", {
     test_dates_times2
   )
 })
+
+test_that("parse_date_time & `exact = TRUE`", {
+  test_df <- tibble(
+    x = c("2022-12-31 12:59:59", "2022-01-01 12:11", "2022-01-01 12", "2022-01-01", NA),
+    y = c("11/23/1998 07:00:00", "6/18/1952 0135", "2/25/1974 0523", "9/07/1985 01", NA)
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        parsed_x =
+          parse_date_time(
+            x,
+            c("%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M", "%Y-%m-%d %H", "%Y-%m-%d"),
+            exact = TRUE
+          ),
+        parsed_y =
+          parse_date_time(
+            y,
+            c("%m/%d/%Y %I:%M:%S", "%m/%d/%Y %H%M", "%m/%d/%Y %H"),
+            exact = TRUE
+          )
+      ) %>%
+      collect(),
+    test_df
+  )
+})

From f4897c1a30fb37c965c6300e36e5e0f89184e90b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Fri, 20 May 2022 12:27:41 +0100
Subject: [PATCH 15/47] refactored `parse_date_time()` with several helper
 functions

---
 r/R/dplyr-datetime-helpers.R | 111 ++++++++++++++++++++++++++++++++
 r/R/dplyr-funcs-datetime.R   | 119 +++--------------------------------
 2 files changed, 119 insertions(+), 111 deletions(-)

diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R
index fd53e79d50c..ac7209c88d7 100644
--- a/r/R/dplyr-datetime-helpers.R
+++ b/r/R/dplyr-datetime-helpers.R
@@ -229,3 +229,114 @@ build_format_from_order <- function(order) {
   formats_without_sep <- do.call(paste, c(outcome, sep = ""))
   c(formats_with_sep, formats_without_sep)
 }
+
+process_data_for_parsing <- function(x,
+                                     orders) {
+
+  processed_x <- x$cast(string())
+
+  # make all separators (non-letters and non-numbers) into "-"
+  processed_x <- call_binding("gsub", "[^A-Za-z0-9]", "-", processed_x)
+  # collapse multiple separators into a single one
+  processed_x <- call_binding("gsub", "-{2,}", "-", processed_x)
+
+  # we need to transform `x` when orders are `ym`, `my`, and `yq`
+  # for `ym` and `my` orders we add a day ("01")
+  augmented_x_ym <- NULL
+  if (any(orders %in% c("ym", "my"))) {
+    # add day as "-01" if there is a "-" separator and as "01" if not
+    augmented_x_ym <- call_binding(
+      "if_else",
+      call_binding("grepl", "-", processed_x),
+      call_binding("paste0", processed_x, "-01"),
+      call_binding("paste0", processed_x, "01")
+    )
+  }
+
+  # for `yq` we need to transform the quarter into the start month (lubridate
+  # behaviour) and then add 01 to parse to the first day of the quarter
+  augmented_x_yq <- NULL
+  if (any(orders == "yq")) {
+    # extract everything that comes after the `-` separator, i.e. the quarter
+    # (e.g. 4 from 2022-4)
+    quarter_x <- call_binding("gsub", "^.*?-", "", processed_x)
+    # we should probably error if quarter is not in 1:4
+    # extract everything that comes before the `-`, i.e. the year (e.g. 2002
+    # in 2002-4)
+    year_x <- call_binding("gsub", "-.*$", "", processed_x)
+    quarter_x <- quarter_x$cast(int32())
+    month_x <- (quarter_x - 1) * 3 + 1
+    augmented_x_yq <- call_binding("paste0", year_x, "-", month_x, "-01")
+  }
+
+  list(
+    "augmented_x_ym" = augmented_x_ym,
+    "augmented_x_yq" = augmented_x_yq,
+    "processed_x" = processed_x
+  )
+}
+
+attempt_parsing <- function(x,
+                            orders,
+                            formats = NULL) {
+  if (is.null(formats)) {
+    # this is the situation in which orders were passed with `exact = TRUE`
+    # no data processing takes place
+    # we don't derive formats as the orders are assumed to be formats
+    parse_attempt_expressions <- build_strptime_exps(x, orders)
+    return(parse_attempt_expressions)
+  }
+
+  processed_data <- process_data_for_parsing(x, orders)
+
+  processed_x <- processed_data[["processed_x"]]
+  augmented_x_ym <- processed_data[["augmented_x_ym"]]
+  augmented_x_yq <- processed_data[["augmented_x_yq"]]
+
+  # build a list of expressions for each format
+  parse_attempt_exp_processed_x <- build_strptime_exps(processed_x, formats)
+
+  # build separate expression lists of parsing attempts for the orders that
+  # need an augmented `x`
+  # list for attempts when orders %in% c("ym", "my")
+  parse_attempt_exp_augmented_x_ym <- list()
+
+  if (!is.null(augmented_x_ym)) {
+    parse_attempt_exp_augmented_x_ym <- build_strptime_exps(augmented_x_ym, formats)
+  }
+
+  # list for attempts when orders %in% c("yq")
+  parse_attempt_exp_augmented_x_yq <- list()
+  if (!is.null(augmented_x_yq)) {
+    parse_attempt_exp_augmented_x_yq <- build_strptime_exps(augmented_x_yq, formats)
+  }
+
+  # combine all attempts expressions in prep for coalesce
+  # if the users passes only a short order (`ym`, `my` or `yq`) then only use
+  # the corresponding augmented_x
+  if (all(orders == "ym") || all(orders == "my")) {
+    parse_attempt_expressions <- parse_attempt_exp_augmented_x_ym
+  } else if (all(orders == "yq")) {
+    parse_attempt_expressions <- parse_attempt_exp_augmented_x_yq
+  } else {
+    parse_attempt_expressions <- c(
+      # if we have an augmented x give preference to the corresponding
+      # parsing attempts
+      parse_attempt_exp_augmented_x_ym,
+      parse_attempt_exp_augmented_x_yq,
+      parse_attempt_exp_processed_x
+    )
+  }
+  parse_attempt_expressions
+}
+
+build_strptime_exps <- function(x, formats) {
+  map(
+    formats,
+    ~ build_expr(
+      "strptime",
+      x,
+      options = list(format = .x, unit = 0L, error_is_null = TRUE)
+    )
+  )
+}
diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R
index 9f279add2b1..bd46527f2ca 100644
--- a/r/R/dplyr-funcs-datetime.R
+++ b/r/R/dplyr-funcs-datetime.R
@@ -522,7 +522,8 @@ register_bindings_datetime_parsers <- function() {
                                                orders,
                                                tz = "UTC",
                                                truncated = 0,
-                                               quiet = TRUE) {
+                                               quiet = TRUE,
+                                               exact = FALSE) {
     if (!quiet) {
       arrow_not_supported("`quiet = FALSE`")
     }
@@ -532,119 +533,15 @@ register_bindings_datetime_parsers <- function() {
       orders <- map_chr(0:truncated, ~ substr(orders, start = 1, stop = nchar(orders) - .x))
     }
 
-    # each order is translated into possible formats
-    formats <- build_formats(orders)
-
-    x <- x$cast(string())
-
-    # make all separators (non-letters and non-numbers) into "-"
-    x <- call_binding("gsub", "[^A-Za-z0-9]", "-", x)
-    # collapse multiple separators into a single one
-    x <- call_binding("gsub", "-{2,}", "-", x)
-
-    # we need to transform `x` when orders are `ym`, `my`, and `yq`
-    # for `ym` and `my` orders we add a day ("01")
-    augmented_x <- NULL
-    if (any(orders %in% c("ym", "my"))) {
-      # add day as "-01" if there is a "-" separator and as "01" if not
-      augmented_x <- call_binding(
-        "if_else",
-        call_binding("grepl", "-", x),
-        call_binding("paste0", x, "-01"),
-        call_binding("paste0", x, "01")
-      )
-    }
-
-    # for `yq` we need to transform the quarter into the start month (lubridate
-    # behaviour) and then add 01 to parse to the first day of the quarter
-    augmented_x2 <- NULL
-    if (any(orders == "yq")) {
-      # extract everything that comes after the `-` separator, i.e. the quarter
-      # (e.g. 4 from 2022-4)
-      quarter_x <- call_binding("gsub", "^.*?-", "", x)
-      # we should probably error if quarter is not in 1:4
-      # extract everything that comes before the `-`, i.e. the year (e.g. 2002
-      # in 2002-4)
-      year_x <- call_binding("gsub", "-.*$", "", x)
-      quarter_x <- quarter_x$cast(int32())
-      month_x <- (quarter_x - 1) * 3 + 1
-      augmented_x2 <- call_binding("paste0", year_x, "-", month_x, "-01")
-    }
-
-    # TODO figure out how to parse strings that have no separators
-    # https://issues.apache.org/jira/browse/ARROW-16446
-    # we could insert separators at the "likely" positions, but it might be
-    # tricky given the possible combinations between dmy formats + locale
-
-    # build a list of expressions for each format
-    parse_attempt_expressions <- map(
-      formats,
-      ~ build_expr(
-        "strptime",
-        x,
-        options = list(
-          format = .x,
-          unit = 0L,
-          error_is_null = TRUE
-        )
-      )
-    )
-
-    # build separate expression lists of parsing attempts for the orders that
-    # need an augmented `x`
-    # list for attempts when orders %in% c("ym", "my")
-    parse_attempt_exp_augmented_x <- list()
-
-    if (!is.null(augmented_x)) {
-      parse_attempt_exp_augmented_x <- map(
-        formats,
-        ~ build_expr(
-          "strptime",
-          augmented_x,
-          options = list(
-            format = .x,
-            unit = 0L,
-            error_is_null = TRUE
-          )
-        )
-      )
-    }
-
-    # list for attempts when orders %in% c("yq")
-    parse_attempt_exp_augmented_x2 <- list()
-    if (!is.null(augmented_x2)) {
-      parse_attempt_exp_augmented_x2 <- map(
-        formats,
-        ~ build_expr(
-          "strptime",
-          augmented_x2,
-          options = list(
-            format = .x,
-            unit = 0L,
-            error_is_null = TRUE
-          )
-        )
-      )
-    }
-
-    # combine all attempts expressions in prep for coalesce
-    # if the users passes only a short order (`ym`, `my` or `yq`) then only use
-    # the corresponding augmented_x
-    if (all(orders == "ym") || all(orders == "my")) {
-      parse_attempt_expressions <- parse_attempt_exp_augmented_x
-    } else if (all(orders == "yq")) {
-      parse_attempt_expressions <- parse_attempt_exp_augmented_x2
+    if (exact == TRUE) {
+      parse_attempts <- attempt_parsing(x, orders = orders)
     } else {
-      parse_attempt_expressions <- c(
-        # if we have an augmented x give preference to the corresponding
-        # parsing attempts
-        parse_attempt_exp_augmented_x,
-        parse_attempt_exp_augmented_x2,
-        parse_attempt_expressions
-      )
+      # each order is translated into possible formats
+      formats <- build_formats(orders)
+      parse_attempts <- attempt_parsing(x, orders = orders, formats = formats)
     }
 
-    coalesce_output <- build_expr("coalesce", args = parse_attempt_expressions)
+    coalesce_output <- build_expr("coalesce", args = parse_attempts)
 
     # we need this binding to be able to handle a NULL `tz`, which will then be
     # used by bindings such as `ymd` to return, based on whether tz is NULL or

From 81468f3291b461b2768beaf58020937f25236fba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Sat, 21 May 2022 11:31:43 +0100
Subject: [PATCH 16/47] move `build_formats` inside `attempt_parsing`, since it
 won't be used outside it

---
 r/R/dplyr-datetime-helpers.R | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R
index ac7209c88d7..a0908b30c9d 100644
--- a/r/R/dplyr-datetime-helpers.R
+++ b/r/R/dplyr-datetime-helpers.R
@@ -277,15 +277,10 @@ process_data_for_parsing <- function(x,
 }
 
 attempt_parsing <- function(x,
-                            orders,
-                            formats = NULL) {
-  if (is.null(formats)) {
-    # this is the situation in which orders were passed with `exact = TRUE`
-    # no data processing takes place
-    # we don't derive formats as the orders are assumed to be formats
-    parse_attempt_expressions <- build_strptime_exps(x, orders)
-    return(parse_attempt_expressions)
-  }
+                            orders) {
+
+  # translate orders into possible formats
+  formats <- build_formats(orders)
 
   processed_data <- process_data_for_parsing(x, orders)
 

From e9cd0e9aa121a3ede88c97cedca063696548a262 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Sat, 21 May 2022 11:32:21 +0100
Subject: [PATCH 17/47] when `exact = TRUE` go directly to
 `build_strptime_exps()`

---
 r/R/dplyr-funcs-datetime.R | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R
index bd46527f2ca..6b4a2a406f8 100644
--- a/r/R/dplyr-funcs-datetime.R
+++ b/r/R/dplyr-funcs-datetime.R
@@ -534,11 +534,10 @@ register_bindings_datetime_parsers <- function() {
     }
 
     if (exact == TRUE) {
-      parse_attempts <- attempt_parsing(x, orders = orders)
+      # no data processing takes place & we don't derive formats
+      parse_attempts <- build_strptime_exps(x, orders)
     } else {
-      # each order is translated into possible formats
-      formats <- build_formats(orders)
-      parse_attempts <- attempt_parsing(x, orders = orders, formats = formats)
+      parse_attempts <- attempt_parsing(x, orders = orders)
     }
 
     coalesce_output <- build_expr("coalesce", args = parse_attempts)

From a72659fb3a4a9b048e30aa897b9958a8689cc678 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Sat, 21 May 2022 11:39:31 +0100
Subject: [PATCH 18/47] assert warning message originating in the dplyr
 pipeline

---
 r/tests/testthat/test-dplyr-funcs-datetime.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 134ba57a259..9572269aad7 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -2069,7 +2069,8 @@ test_that("parse_date_time with hours, minutes and seconds components", {
         ) %>%
         collect(),
       "`quiet = FALSE` not supported in Arrow"
-    )
+    ),
+    "All formats failed to parse"
   )
 
   test_dates_times2 <- tibble(

From edb76ccab9730fcb58a5f30d28238a170c046c23 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Mon, 23 May 2022 12:38:29 +0100
Subject: [PATCH 19/47] limit `truncated` to `nchar(orders) - 3` + unit test

---
 r/R/dplyr-funcs-datetime.R                   |  3 ++
 r/tests/testthat/test-dplyr-funcs-datetime.R | 37 +++++++++++++++-----
 2 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R
index 6b4a2a406f8..9dcc563fef6 100644
--- a/r/R/dplyr-funcs-datetime.R
+++ b/r/R/dplyr-funcs-datetime.R
@@ -529,6 +529,9 @@ register_bindings_datetime_parsers <- function() {
     }
 
     if (truncated != 0) {
+      if (truncated > (nchar(orders) - 3)) {
+        arrow_not_supported(paste0("a value for `truncated` > ", nchar(orders) - 3))
+      }
       # build several orders for truncated formats
       orders <- map_chr(0:truncated, ~ substr(orders, start = 1, stop = nchar(orders) - .x))
     }
diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 9572269aad7..52ba6f901f7 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -2035,6 +2035,16 @@ test_that("parse_date_time with hours, minutes and seconds components", {
   )
 
   # test truncated formats
+  test_truncation_df <-  tibble(
+    truncated_ymd_string =
+      c(
+        "2022-05-19 13:46:51",
+        "2022-05-18 13:46",
+        "2022-05-17 13",
+        "2022-05-16"
+      )
+  )
+
   compare_dplyr_binding(
     .input %>%
       mutate(
@@ -2046,17 +2056,26 @@ test_that("parse_date_time with hours, minutes and seconds components", {
           )
       ) %>%
       collect(),
-    tibble(
-      truncated_ymd_string =
-        c(
-          "2022-05-19 13:46:51",
-          "2022-05-18 13:46",
-          "2022-05-17 13",
-          "2022-05-16"
-        )
-    )
+    test_truncation_df
+  )
+
+  # values for truncated greater than nchar(orders) - 3 not supported in Arrow
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        dttm =
+          parse_date_time(
+            truncated_ymd_string,
+            orders = "ymd_HMS",
+            truncated = 5
+          )
+      ) %>%
+      collect(),
+    test_truncation_df,
+    warning = "a value for `truncated` > 4 not supported in Arrow"
   )
 
+
   # we need expect_warning twice as both the arrow pipeline (because quiet =
   # FALSE is not supported) and the fallback dplyr/lubridate one throw
   # warnings (the lubridate one because quiet is FALSE)

From 1c39c7cf29aee7dca65aa3a57aaeb851b9ecd176 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Mon, 23 May 2022 13:12:21 +0100
Subject: [PATCH 20/47] support hours as `"%I"` + unit tests

---
 r/R/dplyr-datetime-helpers.R                 | 10 ++++++++--
 r/tests/testthat/test-dplyr-funcs-datetime.R | 11 +++++++++++
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R
index a0908b30c9d..1698d4a296b 100644
--- a/r/R/dplyr-datetime-helpers.R
+++ b/r/R/dplyr-datetime-helpers.R
@@ -184,12 +184,16 @@ build_formats <- function(orders) {
     "ymd_HMS", "ymd_HM", "ymd_H", "dmy_HMS", "dmy_HM", "dmy_H", "mdy_HMS",
     "mdy_HM", "mdy_H", "ydm_HMS", "ydm_HM", "ydm_H"
   )
+  ymd_ims_orders <- gsub("H", "I", ymd_hms_orders)
 
   supported_orders <- c(
     ymd_orders,
     ymd_hms_orders,
     gsub("_", " ", ymd_hms_orders), # allow "_", " " and "" as separators
-    gsub("_", "", ymd_hms_orders)
+    gsub("_", "", ymd_hms_orders),
+    ymd_ims_orders,
+    gsub("_", " ", ymd_ims_orders), # allow "_", " " and "" as separators
+    gsub("_", "", ymd_ims_orders)
   )
 
   unsupported_passed_orders <- setdiff(orders, supported_orders)
@@ -219,7 +223,8 @@ build_format_from_order <- function(order) {
     "d" = "%d",
     "H" = "%H",
     "M" = "%M",
-    "S" = "%S"
+    "S" = "%S",
+    "I" = "%I"
   )
 
   split_order <- strsplit(order, split = "")[[1]]
@@ -242,6 +247,7 @@ process_data_for_parsing <- function(x,
 
   # we need to transform `x` when orders are `ym`, `my`, and `yq`
   # for `ym` and `my` orders we add a day ("01")
+  # TODO revisit after https://issues.apache.org/jira/browse/ARROW-16627
   augmented_x_ym <- NULL
   if (any(orders %in% c("ym", "my"))) {
     # add day as "-01" if there is a "-" separator and as "01" if not
diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 52ba6f901f7..76ba7034d75 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -2034,6 +2034,17 @@ test_that("parse_date_time with hours, minutes and seconds components", {
     test_dates_times
   )
 
+  # test ymd_ims
+  compare_dplyr_binding(
+    .input %>%
+      mutate(ymd_ims_dttm = parse_date_time(ymd_ims_string, orders = "ymd_IMS")) %>%
+      collect(),
+    tibble(
+     ymd_ims_string =
+       c("67-01-09 12:34:56", "1970-05-22 11:13:59", "870822101359", NA)
+   )
+  )
+
   # test truncated formats
   test_truncation_df <-  tibble(
     truncated_ymd_string =

From ad12187a329bcb47b6cd0497464012223989c612 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Mon, 23 May 2022 14:07:31 +0100
Subject: [PATCH 21/47] simplified `attempt_parsing()`

---
 r/R/dplyr-datetime-helpers.R | 47 +++++++++---------------------------
 r/R/dplyr-funcs-datetime.R   |  2 +-
 2 files changed, 12 insertions(+), 37 deletions(-)

diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R
index 1698d4a296b..29b0a7a423e 100644
--- a/r/R/dplyr-datetime-helpers.R
+++ b/r/R/dplyr-datetime-helpers.R
@@ -184,6 +184,7 @@ build_formats <- function(orders) {
     "ymd_HMS", "ymd_HM", "ymd_H", "dmy_HMS", "dmy_HM", "dmy_H", "mdy_HMS",
     "mdy_HM", "mdy_H", "ydm_HMS", "ydm_HM", "ydm_H"
   )
+  # support "%I" hour formats
   ymd_ims_orders <- gsub("H", "I", ymd_hms_orders)
 
   supported_orders <- c(
@@ -290,48 +291,22 @@ attempt_parsing <- function(x,
 
   processed_data <- process_data_for_parsing(x, orders)
 
-  processed_x <- processed_data[["processed_x"]]
-  augmented_x_ym <- processed_data[["augmented_x_ym"]]
-  augmented_x_yq <- processed_data[["augmented_x_yq"]]
+  parse_attempt_exprs_list <- map(processed_data, build_strptime_exprs, formats)
 
-  # build a list of expressions for each format
-  parse_attempt_exp_processed_x <- build_strptime_exps(processed_x, formats)
-
-  # build separate expression lists of parsing attempts for the orders that
-  # need an augmented `x`
-  # list for attempts when orders %in% c("ym", "my")
-  parse_attempt_exp_augmented_x_ym <- list()
-
-  if (!is.null(augmented_x_ym)) {
-    parse_attempt_exp_augmented_x_ym <- build_strptime_exps(augmented_x_ym, formats)
+  # if all orders are c("ym", "my", "yq") only attempt to parse the augmented_x
+  if (all(orders %in% c("ym", "my", "yq"))) {
+    parse_attempt_exprs_list$processed_x <- list()
   }
 
-  # list for attempts when orders %in% c("yq")
-  parse_attempt_exp_augmented_x_yq <- list()
-  if (!is.null(augmented_x_yq)) {
-    parse_attempt_exp_augmented_x_yq <- build_strptime_exps(augmented_x_yq, formats)
-  }
+  purrr::flatten(parse_attempt_exprs_list)
+}
 
-  # combine all attempts expressions in prep for coalesce
-  # if the users passes only a short order (`ym`, `my` or `yq`) then only use
-  # the corresponding augmented_x
-  if (all(orders == "ym") || all(orders == "my")) {
-    parse_attempt_expressions <- parse_attempt_exp_augmented_x_ym
-  } else if (all(orders == "yq")) {
-    parse_attempt_expressions <- parse_attempt_exp_augmented_x_yq
-  } else {
-    parse_attempt_expressions <- c(
-      # if we have an augmented x give preference to the corresponding
-      # parsing attempts
-      parse_attempt_exp_augmented_x_ym,
-      parse_attempt_exp_augmented_x_yq,
-      parse_attempt_exp_processed_x
-    )
+build_strptime_exprs <- function(x, formats) {
+  # returning an empty list helps when iterating with build_strptime_exprs
+  if (is.null(x)) {
+    return(list())
   }
-  parse_attempt_expressions
-}
 
-build_strptime_exps <- function(x, formats) {
   map(
     formats,
     ~ build_expr(
diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R
index 9dcc563fef6..a9d89e283e8 100644
--- a/r/R/dplyr-funcs-datetime.R
+++ b/r/R/dplyr-funcs-datetime.R
@@ -538,7 +538,7 @@ register_bindings_datetime_parsers <- function() {
 
     if (exact == TRUE) {
       # no data processing takes place & we don't derive formats
-      parse_attempts <- build_strptime_exps(x, orders)
+      parse_attempts <- build_strptime_exprs(x, orders)
     } else {
       parse_attempts <- attempt_parsing(x, orders = orders)
     }

From 384bc0a093acb488a68daf791139d69403b220b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Mon, 23 May 2022 14:40:51 +0100
Subject: [PATCH 22/47] minor

---
 r/R/dplyr-datetime-helpers.R                 | 5 ++---
 r/tests/testthat/test-dplyr-funcs-datetime.R | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R
index 29b0a7a423e..abc8801d512 100644
--- a/r/R/dplyr-datetime-helpers.R
+++ b/r/R/dplyr-datetime-helpers.R
@@ -285,7 +285,6 @@ process_data_for_parsing <- function(x,
 
 attempt_parsing <- function(x,
                             orders) {
-
   # translate orders into possible formats
   formats <- build_formats(orders)
 
@@ -293,7 +292,7 @@ attempt_parsing <- function(x,
 
   parse_attempt_exprs_list <- map(processed_data, build_strptime_exprs, formats)
 
-  # if all orders are c("ym", "my", "yq") only attempt to parse the augmented_x
+  # if all orders are in c("ym", "my", "yq") only attempt to parse the augmented_x
   if (all(orders %in% c("ym", "my", "yq"))) {
     parse_attempt_exprs_list$processed_x <- list()
   }
@@ -302,7 +301,7 @@ attempt_parsing <- function(x,
 }
 
 build_strptime_exprs <- function(x, formats) {
-  # returning an empty list helps when iterating with build_strptime_exprs
+  # returning an empty list helps when iterating
   if (is.null(x)) {
     return(list())
   }
diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 76ba7034d75..348326da084 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -2041,7 +2041,7 @@ test_that("parse_date_time with hours, minutes and seconds components", {
       collect(),
     tibble(
      ymd_ims_string =
-       c("67-01-09 12:34:56", "1970-05-22 11:13:59", "870822101359", NA)
+       c("67-01-09 9:34:56", "1970-05-22 10:13:59", "870822111359", NA)
    )
   )
 

From 0e539d898fb048621bec6849738125b2bbc408e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Tue, 24 May 2022 13:54:51 +0100
Subject: [PATCH 23/47] add support and unit tests for `"qy"` order + document
 `process_data_for_parsing()`

---
 r/R/dplyr-datetime-helpers.R                 | 55 ++++++++++++++++++--
 r/R/dplyr-funcs-datetime.R                   |  2 +-
 r/tests/testthat/test-dplyr-funcs-datetime.R | 20 ++++++-
 3 files changed, 70 insertions(+), 7 deletions(-)

diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R
index abc8801d512..767da018d6f 100644
--- a/r/R/dplyr-datetime-helpers.R
+++ b/r/R/dplyr-datetime-helpers.R
@@ -154,7 +154,9 @@ binding_as_date_numeric <- function(x, origin = "1970-01-01") {
 
 build_formats <- function(orders) {
   # only keep the letters and the underscore as separator -> allow the users to
-  # pass strptime-like formats (with "%"). Processing is needed (instead of passing
+  # pass strptime-like formats (with "%"). We process the data -> we need to
+  # process the `orders` (even if supplied in the desired format)
+  # Processing is needed (instead of passing
   # formats as-is) due to the processing of the character vector in parse_date_time()
   orders <- gsub("[^A-Za-z]", "", orders)
   orders <- gsub("Y", "y", orders)
@@ -179,6 +181,12 @@ build_formats <- function(orders) {
     orders <- unique(c(orders1, orders2))
   }
 
+  if (any(orders == "qy")) {
+    orders1 <- setdiff(orders, "qy")
+    orders2 <- "ymd"
+    orders <- unique(c(orders1, orders2))
+  }
+
   ymd_orders <- c("ymd", "ydm", "mdy", "myd", "dmy", "dym")
   ymd_hms_orders <- c(
     "ymd_HMS", "ymd_HM", "ymd_H", "dmy_HMS", "dmy_HM", "dmy_H", "mdy_HMS",
@@ -190,10 +198,10 @@ build_formats <- function(orders) {
   supported_orders <- c(
     ymd_orders,
     ymd_hms_orders,
-    gsub("_", " ", ymd_hms_orders), # allow "_", " " and "" as separators
+    gsub("_", " ", ymd_hms_orders), # allow "_", " " and "" as order separators
     gsub("_", "", ymd_hms_orders),
     ymd_ims_orders,
-    gsub("_", " ", ymd_ims_orders), # allow "_", " " and "" as separators
+    gsub("_", " ", ymd_ims_orders), # allow "_", " " and "" as order separators
     gsub("_", "", ymd_ims_orders)
   )
 
@@ -236,9 +244,36 @@ build_format_from_order <- function(order) {
   c(formats_with_sep, formats_without_sep)
 }
 
+#' Process data in preparation for parsing
+#'
+#' `process_data_for_parsing()` takes a data column and a vector of `orders` and
+#' prepares several versions of the input data:
+#'   * `processed_x` is a version of `x` where all separators were replaced with
+#'  `"-"` and multiple separators were collapsed into a single one. This element
+#'  is only set to an empty list when the `orders` argument indicate we're only
+#'  interested in parsing the augmented version of `x`.
+#'  * each of the other 3 elements augment `x` in some way
+#'    * `augmented_x_ym` - augments the `ym` and `my` formats by adding `"01"`
+#'    (to indicate the first day of the month)
+#'    * `augmented_x_yq` - transforms the `yq` format to `ymd`, by deriving the
+#'    first month of the quarter and adding `"01"` to indicate the first day
+#'    * `augmented_x_qy` - transforms the `qy` format to `ymd` in a similar
+#'    manner to `"yq"`
+#'
+#' @param x an Expression corresponding to a character or numeric vector of
+#' dates to be parsed.
+#' @param orders a character vector of date-time formats.
+#'
+#' @return a list made up of 4 list, each a different version of x:
+#'  * `processed_x`
+#'  * `augmented_x_ym`
+#'  * `augmented_x_yq`
+#'  * `augmented_x_qy`
+#' @noRd
 process_data_for_parsing <- function(x,
                                      orders) {
 
+  # browser()
   processed_x <- x$cast(string())
 
   # make all separators (non-letters and non-numbers) into "-"
@@ -276,9 +311,21 @@ process_data_for_parsing <- function(x,
     augmented_x_yq <- call_binding("paste0", year_x, "-", month_x, "-01")
   }
 
+  augmented_x_qy <- NULL
+  if (any(orders == "qy")) {
+    quarter_x <- call_binding("gsub", "-.*$", "", processed_x)
+    quarter_x <- quarter_x$cast(int32())
+    year_x <- call_binding("gsub", "^.*?-", "", processed_x)
+    # year might be missing the final 0s when extracted from a float
+    year_x <- call_binding("str_pad", year_x, width = 4, side = "right", pad = "0")
+    month_x <- (quarter_x - 1) * 3 + 1
+    augmented_x_qy <- call_binding("paste0", year_x, "-", month_x, "-01")
+  }
+
   list(
     "augmented_x_ym" = augmented_x_ym,
     "augmented_x_yq" = augmented_x_yq,
+    "augmented_x_qy" = augmented_x_qy,
     "processed_x" = processed_x
   )
 }
@@ -293,7 +340,7 @@ attempt_parsing <- function(x,
   parse_attempt_exprs_list <- map(processed_data, build_strptime_exprs, formats)
 
   # if all orders are in c("ym", "my", "yq") only attempt to parse the augmented_x
-  if (all(orders %in% c("ym", "my", "yq"))) {
+  if (all(orders %in% c("ym", "my", "yq", "qy"))) {
     parse_attempt_exprs_list$processed_x <- list()
   }
 
diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R
index a9d89e283e8..33c02f781c7 100644
--- a/r/R/dplyr-funcs-datetime.R
+++ b/r/R/dplyr-funcs-datetime.R
@@ -528,7 +528,7 @@ register_bindings_datetime_parsers <- function() {
       arrow_not_supported("`quiet = FALSE`")
     }
 
-    if (truncated != 0) {
+    if (truncated > 0) {
       if (truncated > (nchar(orders) - 3)) {
         arrow_not_supported(paste0("a value for `truncated` > ", nchar(orders) - 3))
       }
diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 348326da084..567c59a7748 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -1790,9 +1790,22 @@ test_that("ym, my & yq parsers", {
     my_string = c("05-2022", "02/2022", "03.22", "12//1979", "09.88", NA),
     yq_string = c("2007.3", "1970.2", "2020.1", "2009.4", "1975.1", NA),
     yq_numeric = c(2007.3, 1970.2, 2020.1, 2009.4, 1975.1, NA),
-    yq_space = c("2007 3", "1970 2", "2020 1", "2009 4", "1975 1", NA)
+    yq_space = c("2007 3", "1970 2", "2020 1", "2009 4", "1975 1", NA),
+    qy_string = c("3.2007", "2.1970", "1.2020", "4.2009", "1.1975", NA),
+    qy_numeric = c(3.2007, 2.1970, 1.2020, 4.2009, 1.1975, NA),
+    qy_space = c("3 2007", "2 1970", "1 2020", "4 2009", "1 1975", NA)
   )
 
+  test_df %>%
+    # arrow_table() %>%
+    mutate(
+      # qy_date_from_string = parse_date_time(qy_string, orders = "qy"),
+      qy_date_from_numeric = parse_date_time(as.character(qy_numeric), orders = "qy"),
+      # qy_date_from_string_with_space = parse_date_time(qy_space, orders = "qy"),
+      .keep = "used"
+    ) %>%
+    collect()
+
   # these functions' internals use some string processing which requires the
   # RE2 library (not available on Windows with R 3.6)
   skip_if_not_available("re2")
@@ -1813,7 +1826,10 @@ test_that("ym, my & yq parsers", {
         my_date2 = parse_date_time(my_string, orders = c("my", "myd")),
         yq_date_from_string2 = parse_date_time(yq_string, orders = "yq"),
         yq_date_from_numeric2 = parse_date_time(yq_numeric, orders = "yq"),
-        yq_date_from_string_with_space2 = parse_date_time(yq_space, orders = "yq")
+        yq_date_from_string_with_space2 = parse_date_time(yq_space, orders = "yq"),
+        qy_date_from_string = parse_date_time(qy_string, orders = "qy"),
+        qy_date_from_numeric = parse_date_time(qy_numeric, orders = "qy"),
+        qy_date_from_string_with_space = parse_date_time(qy_space, orders = "qy")
       ) %>%
       collect(),
     test_df

From 63acafc2cb79693e244f02e9d3379ea9fa80bbcb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Tue, 24 May 2022 13:57:12 +0100
Subject: [PATCH 24/47] unit test clean-up

---
 r/tests/testthat/test-dplyr-funcs-datetime.R | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 567c59a7748..0f6e9587cf7 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -1796,16 +1796,6 @@ test_that("ym, my & yq parsers", {
     qy_space = c("3 2007", "2 1970", "1 2020", "4 2009", "1 1975", NA)
   )
 
-  test_df %>%
-    # arrow_table() %>%
-    mutate(
-      # qy_date_from_string = parse_date_time(qy_string, orders = "qy"),
-      qy_date_from_numeric = parse_date_time(as.character(qy_numeric), orders = "qy"),
-      # qy_date_from_string_with_space = parse_date_time(qy_space, orders = "qy"),
-      .keep = "used"
-    ) %>%
-    collect()
-
   # these functions' internals use some string processing which requires the
   # RE2 library (not available on Windows with R 3.6)
   skip_if_not_available("re2")

From b492c30bf400650013ab02e40b122bf78cd92a94 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Tue, 24 May 2022 14:53:22 +0100
Subject: [PATCH 25/47] comments

---
 r/R/dplyr-datetime-helpers.R | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R
index 767da018d6f..2e0b22d9b55 100644
--- a/r/R/dplyr-datetime-helpers.R
+++ b/r/R/dplyr-datetime-helpers.R
@@ -316,7 +316,8 @@ process_data_for_parsing <- function(x,
     quarter_x <- call_binding("gsub", "-.*$", "", processed_x)
     quarter_x <- quarter_x$cast(int32())
     year_x <- call_binding("gsub", "^.*?-", "", processed_x)
-    # year might be missing the final 0s when extracted from a float
+    # year might be missing the final 0s when extracted from a float, hence the
+    # need to pad
     year_x <- call_binding("str_pad", year_x, width = 4, side = "right", pad = "0")
     month_x <- (quarter_x - 1) * 3 + 1
     augmented_x_qy <- call_binding("paste0", year_x, "-", month_x, "-01")
@@ -339,7 +340,8 @@ attempt_parsing <- function(x,
 
   parse_attempt_exprs_list <- map(processed_data, build_strptime_exprs, formats)
 
-  # if all orders are in c("ym", "my", "yq") only attempt to parse the augmented_x
+  # if all orders are in c("ym", "my", "yq", "qy") only attempt to parse the
+  # augmented version(s) of x
   if (all(orders %in% c("ym", "my", "yq", "qy"))) {
     parse_attempt_exprs_list$processed_x <- list()
   }

From 163d895f7f999621a3f0b4d2c083221179874a98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Tue, 24 May 2022 14:55:02 +0100
Subject: [PATCH 26/47] comment + removed `browser()`

---
 r/R/dplyr-datetime-helpers.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R
index 2e0b22d9b55..6a13cb08990 100644
--- a/r/R/dplyr-datetime-helpers.R
+++ b/r/R/dplyr-datetime-helpers.R
@@ -273,7 +273,6 @@ build_format_from_order <- function(order) {
 process_data_for_parsing <- function(x,
                                      orders) {
 
-  # browser()
   processed_x <- x$cast(string())
 
   # make all separators (non-letters and non-numbers) into "-"
@@ -311,6 +310,8 @@ process_data_for_parsing <- function(x,
     augmented_x_yq <- call_binding("paste0", year_x, "-", month_x, "-01")
   }
 
+  # same as for `yq`, we need to derive the month from the quarter and add a
+  # "01" to give us the first day of the month
   augmented_x_qy <- NULL
   if (any(orders == "qy")) {
     quarter_x <- call_binding("gsub", "-.*$", "", processed_x)

From 856f4027f531ec7a6666608ae2b4f20172764832 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Tue, 24 May 2022 17:00:34 +0100
Subject: [PATCH 27/47] test `"IMS"` with hour greater than 12

---
 r/tests/testthat/test-dplyr-funcs-datetime.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 0f6e9587cf7..59babb4f213 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -2047,7 +2047,7 @@ test_that("parse_date_time with hours, minutes and seconds components", {
       collect(),
     tibble(
      ymd_ims_string =
-       c("67-01-09 9:34:56", "1970-05-22 10:13:59", "870822111359", NA)
+       c("67-01-09 9:34:56", "1970-05-22 10:13:59", "870822171359", NA)
    )
   )
 

From 56ad69c22b08e7c400a857975ee3fc4db1ebf3a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Tue, 24 May 2022 17:06:09 +0100
Subject: [PATCH 28/47] comment + reflow

---
 r/tests/testthat/test-dplyr-funcs-datetime.R | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 59babb4f213..a9f6aedc122 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -2043,7 +2043,15 @@ test_that("parse_date_time with hours, minutes and seconds components", {
   # test ymd_ims
   compare_dplyr_binding(
     .input %>%
-      mutate(ymd_ims_dttm = parse_date_time(ymd_ims_string, orders = "ymd_IMS")) %>%
+      mutate(
+        ymd_ims_dttm =
+          parse_date_time(
+            ymd_ims_string,
+            orders = "ymd_IMS",
+            # we add this since lubridate is chatty and will warn 1 format failed to parse
+            quiet = TRUE
+          )
+      ) %>%
       collect(),
     tibble(
      ymd_ims_string =

From 8aefd2c94502e641794ace80d94b7dfaee0fbea3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Wed, 25 May 2022 09:05:27 +0100
Subject: [PATCH 29/47] make the ims string unanbiguous

---
 r/tests/testthat/test-dplyr-funcs-datetime.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index a9f6aedc122..fa1f4d807f8 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -2055,7 +2055,7 @@ test_that("parse_date_time with hours, minutes and seconds components", {
       collect(),
     tibble(
      ymd_ims_string =
-       c("67-01-09 9:34:56", "1970-05-22 10:13:59", "870822171359", NA)
+       c("67-01-09 9:34:56", "1970-05-22 10:13:59", "19870822171359", NA)
    )
   )
 

From 600a6ff491e520cfe82bcdd82db16bf72321bbbf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Wed, 25 May 2022 09:07:11 +0100
Subject: [PATCH 30/47] comment

---
 r/tests/testthat/test-dplyr-funcs-datetime.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index fa1f4d807f8..8fd3e2d9cba 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -2048,7 +2048,7 @@ test_that("parse_date_time with hours, minutes and seconds components", {
           parse_date_time(
             ymd_ims_string,
             orders = "ymd_IMS",
-            # we add this since lubridate is chatty and will warn 1 format failed to parse
+            # lubridate is chatty and will warn 1 format failed to parse
             quiet = TRUE
           )
       ) %>%

From c5ee8ad1b9bc87bda5e9ec84e759a63ba2bd6062 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Wed, 25 May 2022 10:37:09 +0100
Subject: [PATCH 31/47] docs + change in approach if `x` has or doesn't have a
 `"-"` separator

---
 r/R/dplyr-datetime-helpers.R | 69 ++++++++++++++++++++++++++++++++----
 1 file changed, 62 insertions(+), 7 deletions(-)

diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R
index 6a13cb08990..f1ec81d2fdd 100644
--- a/r/R/dplyr-datetime-helpers.R
+++ b/r/R/dplyr-datetime-helpers.R
@@ -239,9 +239,10 @@ build_format_from_order <- function(order) {
   split_order <- strsplit(order, split = "")[[1]]
 
   outcome <- expand.grid(char_list[split_order])
+  # return only formats with "-" separator, which will be removed during parsing
+  # if the string to be parsed does not contain a separator
   formats_with_sep <- do.call(paste, c(outcome, sep = "-"))
-  formats_without_sep <- do.call(paste, c(outcome, sep = ""))
-  c(formats_with_sep, formats_without_sep)
+  formats_with_sep
 }
 
 #' Process data in preparation for parsing
@@ -253,7 +254,7 @@ build_format_from_order <- function(order) {
 #'  is only set to an empty list when the `orders` argument indicate we're only
 #'  interested in parsing the augmented version of `x`.
 #'  * each of the other 3 elements augment `x` in some way
-#'    * `augmented_x_ym` - augments the `ym` and `my` formats by adding `"01"`
+#'    * `augmented_x_ym` - builds the `ym` and `my` formats by adding `"01"`
 #'    (to indicate the first day of the month)
 #'    * `augmented_x_yq` - transforms the `yq` format to `ymd`, by deriving the
 #'    first month of the quarter and adding `"01"` to indicate the first day
@@ -332,13 +333,30 @@ process_data_for_parsing <- function(x,
   )
 }
 
+
+#' Attempt parsing
+#'
+#' This function does several things:
+#'   * builds all possible `formats` from the supplied `orders`
+#'   * processes the data with `process_data_for_parsing()`
+#'   * build a list of the possible `strptime` Expressions for the data & formats
+#'   combinations
+#'
+#' @inheritParams process_data_for_parsing
+#'
+#' @return a list of `strptime` Expressions we can use with `coalesce`
+#' @noRd
 attempt_parsing <- function(x,
                             orders) {
   # translate orders into possible formats
   formats <- build_formats(orders)
 
+  # depending on the orders argument we need to do some processing to the input
+  # data
   processed_data <- process_data_for_parsing(x, orders)
 
+  # build a list of expressions for parsing each processed_data element and
+  # format combination
   parse_attempt_exprs_list <- map(processed_data, build_strptime_exprs, formats)
 
   # if all orders are in c("ym", "my", "yq", "qy") only attempt to parse the
@@ -347,21 +365,58 @@ attempt_parsing <- function(x,
     parse_attempt_exprs_list$processed_x <- list()
   }
 
+  # we need the output to be a list of expressions (currently it is a list of
+  # lists of expressions due to the shape of the processed data. we have one list
+  # of expressions for each element of/ list in processed_data) -> we need to
+  # remove a level of hierarchy from the list
   purrr::flatten(parse_attempt_exprs_list)
 }
 
+#' Build `strptime` expressions
+#'
+#' This function takes several `formats`, iterates over them and builds a
+#' `strptime` Expression for each of them. Given these Expressions are evaluated
+#' row-wise we can leverage this behaviour and introduce a condition. If `x` has
+#' a separator, use the `format` as is, if it doesn't have a separator, remove
+#' the `"-"` separator from the `format`.
+#'
+#' @param x an Expression corresponding to a character or numeric vector of
+#' dates to be parsed.
+#' @param formats a vector of formats as returned by `build_format_from_order`
+#'
+#' @return a list of Expressions
+#' @noRd
 build_strptime_exprs <- function(x, formats) {
   # returning an empty list helps when iterating
   if (is.null(x)) {
     return(list())
   }
 
+  # if x has a separator ("-") use the format as-is (i.e. with separator)
+  # if not, remove the separator
   map(
     formats,
-    ~ build_expr(
-      "strptime",
-      x,
-      options = list(format = .x, unit = 0L, error_is_null = TRUE)
+    ~ call_binding(
+      "if_else",
+      call_binding("grepl", "-", x),
+      build_expr(
+        "strptime",
+        x,
+        options = list(
+          format = .x,
+          unit = 0L,
+          error_is_null = TRUE
+        )
+      ),
+      build_expr(
+        "strptime",
+        x,
+        options = list(
+          format = gsub("-", "", .x),
+          unit = 0L,
+          error_is_null = TRUE
+        )
+      )
     )
   )
 }

From e8bc62ddf7ff82598929614cca2c19b7e4240554 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Wed, 25 May 2022 12:44:57 +0100
Subject: [PATCH 32/47] update tests

---
 r/tests/testthat/test-dplyr-funcs-datetime.R | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 8fd3e2d9cba..24a74592e26 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -2176,6 +2176,9 @@ test_that("parse_date_time & `exact = TRUE`", {
     y = c("11/23/1998 07:00:00", "6/18/1952 0135", "2/25/1974 0523", "9/07/1985 01", NA)
   )
 
+  # these functions' internals use some string processing which requires the
+  # RE2 library (not available on Windows with R 3.6)
+  skip_if_not_available("re2")
   compare_dplyr_binding(
     .input %>%
       mutate(

From aa616e543d9f714e44f0f31681e4b9027253308a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Thu, 26 May 2022 11:42:58 +0100
Subject: [PATCH 33/47] regular R objects as input + tests

---
 r/R/dplyr-funcs-datetime.R                   |  4 ++++
 r/tests/testthat/test-dplyr-funcs-datetime.R | 13 +++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R
index 33c02f781c7..5c3f3f4c764 100644
--- a/r/R/dplyr-funcs-datetime.R
+++ b/r/R/dplyr-funcs-datetime.R
@@ -536,6 +536,10 @@ register_bindings_datetime_parsers <- function() {
       orders <- map_chr(0:truncated, ~ substr(orders, start = 1, stop = nchar(orders) - .x))
     }
 
+    if (!inherits(x, "Expression")) {
+      x <- Expression$scalar(x)
+    }
+
     if (exact == TRUE) {
       # no data processing takes place & we don't derive formats
       parse_attempts <- build_strptime_exprs(x, orders)
diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 24a74592e26..8b19a6e9f11 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -2199,3 +2199,16 @@ test_that("parse_date_time & `exact = TRUE`", {
     test_df
   )
 })
+
+test_that("parse_date_time with R objects", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        b = parse_date_time("2022-12-31 12:59:59", orders = "ymd_HMS")
+      ) %>%
+      collect(),
+    tibble(
+      a = 1
+    )
+  )
+})

From 2ddcd5e4c2987cb4884e148c63a718c0f1abff5f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Thu, 26 May 2022 11:45:39 +0100
Subject: [PATCH 34/47] merged test_that blocks

---
 r/tests/testthat/test-dplyr-funcs-datetime.R | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 8b19a6e9f11..7de48927efa 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -2170,7 +2170,7 @@ test_that("parse_date_time with hours, minutes and seconds components", {
   )
 })
 
-test_that("parse_date_time & `exact = TRUE`", {
+test_that("parse_date_time with `exact = TRUE`, and with regular R objects", {
   test_df <- tibble(
     x = c("2022-12-31 12:59:59", "2022-01-01 12:11", "2022-01-01 12", "2022-01-01", NA),
     y = c("11/23/1998 07:00:00", "6/18/1952 0135", "2/25/1974 0523", "9/07/1985 01", NA)
@@ -2198,9 +2198,6 @@ test_that("parse_date_time & `exact = TRUE`", {
       collect(),
     test_df
   )
-})
-
-test_that("parse_date_time with R objects", {
   compare_dplyr_binding(
     .input %>%
       mutate(

From ecfd70ab63f7c0b0ee94fee76521d349270c3f71 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Mon, 6 Jun 2022 10:15:31 +0100
Subject: [PATCH 35/47] added some tests for `build_formats()` (not finished)

---
 r/tests/testthat/test-dplyr-funcs-datetime.R | 88 ++++++++++++++++++++
 1 file changed, 88 insertions(+)

diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 7de48927efa..64489645afa 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -2209,3 +2209,91 @@ test_that("parse_date_time with `exact = TRUE`, and with regular R objects", {
     )
   )
 })
+
+test_that("build_formats", {
+  # TODO finish adding tests for build_formats
+  expect_equal(
+    build_formats(c("ym", "myd", "%Y-%d-%m")),
+    c("%m-%y-%d", "%B-%y-%d", "%b-%y-%d", "%m-%Y-%d", "%B-%Y-%d",
+      "%b-%Y-%d", "%y-%d-%m", "%Y-%d-%m", "%y-%d-%B", "%Y-%d-%B", "%y-%d-%b",
+      "%Y-%d-%b", "%y-%m-%d", "%Y-%m-%d", "%y-%B-%d", "%Y-%B-%d", "%y-%b-%d",
+      "%Y-%b-%d")
+  )
+  expect_error(build_formats("abd"))
+#   a
+#   Abbreviated weekday name in the current locale. (Also matches full name)
+#
+#   A
+#   Full weekday name in the current locale. (Also matches abbreviated name).
+#
+#   You don't need to specify a and A formats explicitly. Wday is automatically handled if preproc_wday = TRUE
+#
+# b (!)
+# Abbreviated or full month name in the current locale. The C parser currently understands only English month names.
+#
+# B (!)
+# Same as b.
+#
+# d (!)
+# Day of the month as decimal number (01–31 or 0–31)
+#
+# H (!)
+# Hours as decimal number (00–24 or 0–24).
+#
+# I (!)
+# Hours as decimal number (01–12 or 1–12).
+#
+# j
+# Day of year as decimal number (001–366 or 1–366).
+#
+# q (!*)
+# Quarter (1–4). The quarter month is added to the parsed month if m element is present.
+#
+# m (!*)
+# Month as decimal number (01–12 or 1–12). For parse_date_time also matches abbreviated and full months names as b and B formats. C parser understands only English month names.
+#
+# M (!)
+# Minute as decimal number (00–59 or 0–59).
+#
+# p (!)
+# AM/PM indicator in the locale. Commonly used in conjunction with I and not with H. But lubridate's C parser accepts H format as long as hour is not greater than 12. C parser understands only English locale AM/PM indicator.
+#
+#   S (!)
+#   Second as decimal number (00–61 or 0–61), allowing for up to two leap-seconds (but POSIX-compliant implementations will ignore leap seconds).
+#
+#   OS
+#   Fractional second.
+#
+#   U
+#   Week of the year as decimal number (00–53 or 0–53) using Sunday as the first day 1 of the week (and typically with the first Sunday of the year as day 1 of week 1). The US convention.
+#
+#   w
+#   Weekday as decimal number (0–6, Sunday is 0).
+#
+#   W
+#   Week of the year as decimal number (00–53 or 0–53) using Monday as the first day of week (and typically with the first Monday of the year as day 1 of week 1). The UK convention.
+#
+#   y (!*)
+#   Year without century (00–99 or 0–99). In parse_date_time() also matches year with century (Y format).
+#
+#   Y (!)
+#   Year with century.
+#
+#   z (!*)
+#   ISO8601 signed offset in hours and minutes from UTC. For example -0800, -08:00 or -08, all represent 8 hours behind UTC. This format also matches the Z (Zulu) UTC indicator. Because base::strptime() doesn't fully support ISO8601 this format is implemented as an union of 4 formats: Ou (Z), Oz (-0800), OO (-08:00) and Oo (-08). You can use these formats as any other but it is rarely necessary. parse_date_time2() and fast_strptime() support all of these formats.
+#
+# Om (!*)
+# Matches numeric month and English alphabetic months (Both, long and abbreviated forms).
+#
+# Op (!*)
+# Matches AM/PM English indicator.
+#
+# r (*)
+# Matches Ip and H orders.
+#
+# R (*)
+# Matches HM andIMp orders.
+#
+# T (*)
+# Matches IMSp, HMS, and HMOS orders.
+})

From f93e038b011854f8c487fc6b08efeef6de330cf3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Mon, 20 Jun 2022 16:14:03 +0100
Subject: [PATCH 36/47] update comment

---
 r/R/dplyr-funcs-datetime.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R
index 5c3f3f4c764..8ecb80b6b45 100644
--- a/r/R/dplyr-funcs-datetime.R
+++ b/r/R/dplyr-funcs-datetime.R
@@ -549,9 +549,9 @@ register_bindings_datetime_parsers <- function() {
 
     coalesce_output <- build_expr("coalesce", args = parse_attempts)
 
-    # we need this binding to be able to handle a NULL `tz`, which will then be
-    # used by bindings such as `ymd` to return, based on whether tz is NULL or
-    # not, a date or timestamp
+    # we need this binding to be able to handle a NULL `tz`, which, in turn,
+    # will be used by bindings such as `ymd()` to return a date or timestamp,
+    # based on whether tz is NULL or not
     if (!is.null(tz)) {
       build_expr("assume_timezone", coalesce_output, options = list(timezone = tz))
     } else {

From 323fe4feeef0d28fa75b549a71c0268a26b3fc11 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Mon, 20 Jun 2022 16:18:35 +0100
Subject: [PATCH 37/47] add a first battery of tests for `build_formats()`

---
 r/tests/testthat/test-dplyr-funcs-datetime.R | 39 +++++++++++++++++++-
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 64489645afa..f1360f0a8b3 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -2211,7 +2211,6 @@ test_that("parse_date_time with `exact = TRUE`, and with regular R objects", {
 })
 
 test_that("build_formats", {
-  # TODO finish adding tests for build_formats
   expect_equal(
     build_formats(c("ym", "myd", "%Y-%d-%m")),
     c("%m-%y-%d", "%B-%y-%d", "%b-%y-%d", "%m-%Y-%d", "%B-%Y-%d",
@@ -2219,7 +2218,43 @@ test_that("build_formats", {
       "%Y-%d-%b", "%y-%m-%d", "%Y-%m-%d", "%y-%B-%d", "%Y-%B-%d", "%y-%b-%d",
       "%Y-%b-%d")
   )
-  expect_error(build_formats("abd"))
+
+  # when order is one of "yq", "qy", "ym" or"my" the data is augmented to "ymd"
+  # or "ydm" and the formats are built accordingly
+  ymd_formats <- c(
+    "%y-%m-%d", "%Y-%m-%d", "%y-%B-%d", "%Y-%B-%d", "%y-%b-%d", "%Y-%b-%d"
+  )
+  expect_equal(
+    build_formats("yq"),
+    ymd_formats
+  )
+
+  expect_equal(
+    build_formats("ym"),
+    ymd_formats
+  )
+
+  expect_equal(
+    build_formats("qy"),
+    ymd_formats
+  )
+
+  # build formats will output unique formats
+  expect_equal(
+    build_formats(c("yq", "ym", "qy")),
+    ymd_formats
+  )
+
+  expect_equal(
+    build_formats("my"),
+    c("%m-%y-%d", "%B-%y-%d", "%b-%y-%d", "%m-%Y-%d", "%B-%Y-%d", "%b-%Y-%d")
+  )
+
+  # ab not supported yet
+  expect_error(
+    build_formats("abd"),
+    '"abd" `orders` not supported in Arrow'
+  )
 #   a
 #   Abbreviated weekday name in the current locale. (Also matches full name)
 #

From 4115f41cd34dde9cdcc9d4f7973375fe49f7ea46 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Mon, 20 Jun 2022 16:33:49 +0100
Subject: [PATCH 38/47] handle `Yq`, `qY`, `mY` and `Ym` + unit tests

---
 r/R/dplyr-datetime-helpers.R                 |  6 +++---
 r/tests/testthat/test-dplyr-funcs-datetime.R | 19 ++++++++++++++++++-
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R
index f1ec81d2fdd..7cba7ac7068 100644
--- a/r/R/dplyr-datetime-helpers.R
+++ b/r/R/dplyr-datetime-helpers.R
@@ -285,7 +285,7 @@ process_data_for_parsing <- function(x,
   # for `ym` and `my` orders we add a day ("01")
   # TODO revisit after https://issues.apache.org/jira/browse/ARROW-16627
   augmented_x_ym <- NULL
-  if (any(orders %in% c("ym", "my"))) {
+  if (any(orders %in% c("ym", "my", "Ym", "mY"))) {
     # add day as "-01" if there is a "-" separator and as "01" if not
     augmented_x_ym <- call_binding(
       "if_else",
@@ -298,7 +298,7 @@ process_data_for_parsing <- function(x,
   # for `yq` we need to transform the quarter into the start month (lubridate
   # behaviour) and then add 01 to parse to the first day of the quarter
   augmented_x_yq <- NULL
-  if (any(orders == "yq")) {
+  if (any(orders %in% c("yq", "Yq"))) {
     # extract everything that comes after the `-` separator, i.e. the quarter
     # (e.g. 4 from 2022-4)
     quarter_x <- call_binding("gsub", "^.*?-", "", processed_x)
@@ -314,7 +314,7 @@ process_data_for_parsing <- function(x,
   # same as for `yq`, we need to derive the month from the quarter and add a
   # "01" to give us the first day of the month
   augmented_x_qy <- NULL
-  if (any(orders == "qy")) {
+  if (any(orders %in% c("qy", "qY"))) {
     quarter_x <- call_binding("gsub", "-.*$", "", processed_x)
     quarter_x <- quarter_x$cast(int32())
     year_x <- call_binding("gsub", "^.*?-", "", processed_x)
diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index f1360f0a8b3..60279b580f4 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -1788,6 +1788,8 @@ test_that("ym, my & yq parsers", {
   test_df <- tibble::tibble(
     ym_string = c("2022-05", "2022/02", "22.03", "1979//12", "88.09", NA),
     my_string = c("05-2022", "02/2022", "03.22", "12//1979", "09.88", NA),
+    Ym_string = c("2022-05", "2022/02", "2022.03", "1979//12", "1988.09", NA),
+    mY_string = c("05-2022", "02/2022", "03.2022", "12//1979", "09.1988", NA),
     yq_string = c("2007.3", "1970.2", "2020.1", "2009.4", "1975.1", NA),
     yq_numeric = c(2007.3, 1970.2, 2020.1, 2009.4, 1975.1, NA),
     yq_space = c("2007 3", "1970 2", "2020 1", "2009 4", "1975 1", NA),
@@ -1804,8 +1806,12 @@ test_that("ym, my & yq parsers", {
       mutate(
         ym_date = ym(ym_string),
         ym_datetime = ym(ym_string, tz = "Pacific/Marquesas"),
+        Ym_date = ym(Ym_string),
+        Ym_datetime = ym(Ym_string, tz = "Pacific/Marquesas"),
         my_date = my(my_string),
         my_datetime = my(my_string, tz = "Pacific/Marquesas"),
+        mY_date = my(mY_string),
+        mY_datetime = my(mY_string, tz = "Pacific/Marquesas"),
         yq_date_from_string = yq(yq_string),
         yq_datetime_from_string = yq(yq_string, tz = "Pacific/Marquesas"),
         yq_date_from_numeric = yq(yq_numeric),
@@ -1814,12 +1820,23 @@ test_that("ym, my & yq parsers", {
         yq_datetime_from_string_with_space = yq(yq_space, tz = "Pacific/Marquesas"),
         ym_date2 = parse_date_time(ym_string, orders = c("ym", "ymd")),
         my_date2 = parse_date_time(my_string, orders = c("my", "myd")),
+        Ym_date2 = parse_date_time(Ym_string, orders = c("Ym", "ymd")),
+        mY_date2 = parse_date_time(mY_string, orders = c("mY", "myd")),
         yq_date_from_string2 = parse_date_time(yq_string, orders = "yq"),
         yq_date_from_numeric2 = parse_date_time(yq_numeric, orders = "yq"),
         yq_date_from_string_with_space2 = parse_date_time(yq_space, orders = "yq"),
+        # testing with Yq
+        yq_date_from_string3 = parse_date_time(yq_string, orders = "Yq"),
+        yq_date_from_numeric3 = parse_date_time(yq_numeric, orders = "Yq"),
+        yq_date_from_string_with_space3 = parse_date_time(yq_space, orders = "Yq"),
+        # testing with qy
         qy_date_from_string = parse_date_time(qy_string, orders = "qy"),
         qy_date_from_numeric = parse_date_time(qy_numeric, orders = "qy"),
-        qy_date_from_string_with_space = parse_date_time(qy_space, orders = "qy")
+        qy_date_from_string_with_space = parse_date_time(qy_space, orders = "qy"),
+        # testing with qY
+        qy_date_from_string2 = parse_date_time(qy_string, orders = "qY"),
+        qy_date_from_numeric2 = parse_date_time(qy_numeric, orders = "qY"),
+        qy_date_from_string_with_space2 = parse_date_time(qy_space, orders = "qY")
       ) %>%
       collect(),
     test_df

From c48425ff79bac74e11253787cab1289d35e034da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Mon, 20 Jun 2022 17:13:31 +0100
Subject: [PATCH 39/47] clean-up

---
 r/tests/testthat/test-dplyr-funcs-datetime.R | 76 --------------------
 1 file changed, 76 deletions(-)

diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 60279b580f4..0e707bd3871 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -2272,80 +2272,4 @@ test_that("build_formats", {
     build_formats("abd"),
     '"abd" `orders` not supported in Arrow'
   )
-#   a
-#   Abbreviated weekday name in the current locale. (Also matches full name)
-#
-#   A
-#   Full weekday name in the current locale. (Also matches abbreviated name).
-#
-#   You don't need to specify a and A formats explicitly. Wday is automatically handled if preproc_wday = TRUE
-#
-# b (!)
-# Abbreviated or full month name in the current locale. The C parser currently understands only English month names.
-#
-# B (!)
-# Same as b.
-#
-# d (!)
-# Day of the month as decimal number (01–31 or 0–31)
-#
-# H (!)
-# Hours as decimal number (00–24 or 0–24).
-#
-# I (!)
-# Hours as decimal number (01–12 or 1–12).
-#
-# j
-# Day of year as decimal number (001–366 or 1–366).
-#
-# q (!*)
-# Quarter (1–4). The quarter month is added to the parsed month if m element is present.
-#
-# m (!*)
-# Month as decimal number (01–12 or 1–12). For parse_date_time also matches abbreviated and full months names as b and B formats. C parser understands only English month names.
-#
-# M (!)
-# Minute as decimal number (00–59 or 0–59).
-#
-# p (!)
-# AM/PM indicator in the locale. Commonly used in conjunction with I and not with H. But lubridate's C parser accepts H format as long as hour is not greater than 12. C parser understands only English locale AM/PM indicator.
-#
-#   S (!)
-#   Second as decimal number (00–61 or 0–61), allowing for up to two leap-seconds (but POSIX-compliant implementations will ignore leap seconds).
-#
-#   OS
-#   Fractional second.
-#
-#   U
-#   Week of the year as decimal number (00–53 or 0–53) using Sunday as the first day 1 of the week (and typically with the first Sunday of the year as day 1 of week 1). The US convention.
-#
-#   w
-#   Weekday as decimal number (0–6, Sunday is 0).
-#
-#   W
-#   Week of the year as decimal number (00–53 or 0–53) using Monday as the first day of week (and typically with the first Monday of the year as day 1 of week 1). The UK convention.
-#
-#   y (!*)
-#   Year without century (00–99 or 0–99). In parse_date_time() also matches year with century (Y format).
-#
-#   Y (!)
-#   Year with century.
-#
-#   z (!*)
-#   ISO8601 signed offset in hours and minutes from UTC. For example -0800, -08:00 or -08, all represent 8 hours behind UTC. This format also matches the Z (Zulu) UTC indicator. Because base::strptime() doesn't fully support ISO8601 this format is implemented as an union of 4 formats: Ou (Z), Oz (-0800), OO (-08:00) and Oo (-08). You can use these formats as any other but it is rarely necessary. parse_date_time2() and fast_strptime() support all of these formats.
-#
-# Om (!*)
-# Matches numeric month and English alphabetic months (Both, long and abbreviated forms).
-#
-# Op (!*)
-# Matches AM/PM English indicator.
-#
-# r (*)
-# Matches Ip and H orders.
-#
-# R (*)
-# Matches HM andIMp orders.
-#
-# T (*)
-# Matches IMSp, HMS, and HMOS orders.
 })

From b8c690095a93facec883e510cfa58cbf844348af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Tue, 21 Jun 2022 13:04:13 +0100
Subject: [PATCH 40/47] switch to the previous implementation, where
 `build_format_from_order()` returns both "-" separated and non-separated
 formats in a single vector

---
 r/R/dplyr-datetime-helpers.R | 44 ++++++++++++------------------------
 1 file changed, 15 insertions(+), 29 deletions(-)

diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R
index 7cba7ac7068..84a87de76a7 100644
--- a/r/R/dplyr-datetime-helpers.R
+++ b/r/R/dplyr-datetime-helpers.R
@@ -172,7 +172,7 @@ build_formats <- function(orders) {
     orders1 <- setdiff(orders, short_orders)
     orders2 <- intersect(orders, short_orders)
     orders2 <- paste0(orders2, "d")
-    orders <- unique(c(orders1, orders2))
+    orders <- unique(c(orders2, orders1))
   }
 
   if (any(orders == "yq")) {
@@ -239,10 +239,14 @@ build_format_from_order <- function(order) {
   split_order <- strsplit(order, split = "")[[1]]
 
   outcome <- expand.grid(char_list[split_order])
-  # return only formats with "-" separator, which will be removed during parsing
-  # if the string to be parsed does not contain a separator
+  # we combine formats with and without the "-" separator, we will later
+  # coalesce through all of them (benchmarking indicated this is a more
+  # computationally efficient approach rather than figuring out if a string has
+  # separators or not and applying only )
+  # during parsing if the string to be parsed does not contain a separator
   formats_with_sep <- do.call(paste, c(outcome, sep = "-"))
-  formats_with_sep
+  formats_without_sep <- do.call(paste, c(outcome, sep = ""))
+  c(formats_with_sep, formats_without_sep)
 }
 
 #' Process data in preparation for parsing
@@ -352,7 +356,8 @@ attempt_parsing <- function(x,
   formats <- build_formats(orders)
 
   # depending on the orders argument we need to do some processing to the input
-  # data
+  # data. `process_data_for_parsing()` uses the passed `orders` and not the
+  # derived `formats`
   processed_data <- process_data_for_parsing(x, orders)
 
   # build a list of expressions for parsing each processed_data element and
@@ -361,7 +366,7 @@ attempt_parsing <- function(x,
 
   # if all orders are in c("ym", "my", "yq", "qy") only attempt to parse the
   # augmented version(s) of x
-  if (all(orders %in% c("ym", "my", "yq", "qy"))) {
+  if (all(orders %in% c("ym", "Ym", "my", "mY", "yq", "Yq", "qy", "qY"))) {
     parse_attempt_exprs_list$processed_x <- list()
   }
 
@@ -392,31 +397,12 @@ build_strptime_exprs <- function(x, formats) {
     return(list())
   }
 
-  # if x has a separator ("-") use the format as-is (i.e. with separator)
-  # if not, remove the separator
   map(
     formats,
-    ~ call_binding(
-      "if_else",
-      call_binding("grepl", "-", x),
-      build_expr(
-        "strptime",
-        x,
-        options = list(
-          format = .x,
-          unit = 0L,
-          error_is_null = TRUE
-        )
-      ),
-      build_expr(
-        "strptime",
-        x,
-        options = list(
-          format = gsub("-", "", .x),
-          unit = 0L,
-          error_is_null = TRUE
-        )
-      )
+    ~ build_expr(
+      "strptime",
+      x,
+      options = list(format = .x, unit = 0L, error_is_null = TRUE)
     )
   )
 }

From 13eac86e466beb3b745e9c29cf6e8465103bd796 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Tue, 21 Jun 2022 13:05:29 +0100
Subject: [PATCH 41/47] updated `build_formats()` unit tests

---
 r/tests/testthat/test-dplyr-funcs-datetime.R | 23 +++++++++++++-------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 0e707bd3871..9865f068d70 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -1786,7 +1786,7 @@ test_that("year, month, day date/time parsers", {
 
 test_that("ym, my & yq parsers", {
   test_df <- tibble::tibble(
-    ym_string = c("2022-05", "2022/02", "22.03", "1979//12", "88.09", NA),
+    ym_string = c("2022-05", "2022/02", "22.3", "1979//12", "88.09", NA),
     my_string = c("05-2022", "02/2022", "03.22", "12//1979", "09.88", NA),
     Ym_string = c("2022-05", "2022/02", "2022.03", "1979//12", "1988.09", NA),
     mY_string = c("05-2022", "02/2022", "03.2022", "12//1979", "09.1988", NA),
@@ -2230,17 +2230,23 @@ test_that("parse_date_time with `exact = TRUE`, and with regular R objects", {
 test_that("build_formats", {
   expect_equal(
     build_formats(c("ym", "myd", "%Y-%d-%m")),
-    c("%m-%y-%d", "%B-%y-%d", "%b-%y-%d", "%m-%Y-%d", "%B-%Y-%d",
-      "%b-%Y-%d", "%y-%d-%m", "%Y-%d-%m", "%y-%d-%B", "%Y-%d-%B", "%y-%d-%b",
-      "%Y-%d-%b", "%y-%m-%d", "%Y-%m-%d", "%y-%B-%d", "%Y-%B-%d", "%y-%b-%d",
-      "%Y-%b-%d")
+    c(
+      # formats from "ym" order
+      "%y-%m-%d", "%Y-%m-%d", "%y-%B-%d", "%Y-%B-%d", "%y-%b-%d", "%Y-%b-%d",
+      "%y%m%d", "%Y%m%d", "%y%B%d", "%Y%B%d", "%y%b%d", "%Y%b%d",
+      # formats from "myd" order
+      "%m-%y-%d", "%B-%y-%d", "%b-%y-%d", "%m-%Y-%d", "%B-%Y-%d", "%b-%Y-%d",
+      "%m%y%d", "%B%y%d", "%b%y%d", "%m%Y%d", "%B%Y%d", "%b%Y%d",
+      # formats from "%Y-%d-%m" format
+      "%y-%d-%m", "%Y-%d-%m", "%y-%d-%B", "%Y-%d-%B", "%y-%d-%b", "%Y-%d-%b",
+      "%y%d%m", "%Y%d%m", "%y%d%B", "%Y%d%B", "%y%d%b", "%Y%d%b")
   )
 
   # when order is one of "yq", "qy", "ym" or"my" the data is augmented to "ymd"
   # or "ydm" and the formats are built accordingly
   ymd_formats <- c(
-    "%y-%m-%d", "%Y-%m-%d", "%y-%B-%d", "%Y-%B-%d", "%y-%b-%d", "%Y-%b-%d"
-  )
+    "%y-%m-%d", "%Y-%m-%d", "%y-%B-%d", "%Y-%B-%d", "%y-%b-%d", "%Y-%b-%d",
+    "%y%m%d", "%Y%m%d", "%y%B%d", "%Y%B%d", "%y%b%d", "%Y%b%d")
   expect_equal(
     build_formats("yq"),
     ymd_formats
@@ -2264,7 +2270,8 @@ test_that("build_formats", {
 
   expect_equal(
     build_formats("my"),
-    c("%m-%y-%d", "%B-%y-%d", "%b-%y-%d", "%m-%Y-%d", "%B-%Y-%d", "%b-%Y-%d")
+    c("%m-%y-%d", "%B-%y-%d", "%b-%y-%d", "%m-%Y-%d", "%B-%Y-%d", "%b-%Y-%d",
+      "%m%y%d", "%B%y%d", "%b%y%d", "%m%Y%d", "%B%Y%d", "%b%Y%d")
   )
 
   # ab not supported yet

From 7d2d8568a68b38a9b74cbc8c492d0fde676f5468 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Tue, 21 Jun 2022 13:27:35 +0100
Subject: [PATCH 42/47] document `build_formats()` and
 `build_format_from_order()`

---
 r/R/dplyr-datetime-helpers.R | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R
index 84a87de76a7..fb8e724448d 100644
--- a/r/R/dplyr-datetime-helpers.R
+++ b/r/R/dplyr-datetime-helpers.R
@@ -152,6 +152,16 @@ binding_as_date_numeric <- function(x, origin = "1970-01-01") {
   x
 }
 
+#' Build formats from multiple orders
+#'
+#' This function is a vectorisation of `build_format_from_order()`. It also
+#' checks if the supplied orders are currently supported.
+#'
+#' @inheritParams process_data_for_parsing
+#'
+#' @return a vector of unique formats
+#'
+#' @noRd
 build_formats <- function(orders) {
   # only keep the letters and the underscore as separator -> allow the users to
   # pass strptime-like formats (with "%"). We process the data -> we need to
@@ -225,6 +235,14 @@ build_formats <- function(orders) {
   unique(formats)
 }
 
+#' Build formats from a single order
+#'
+#' @param order a single string date-time format, such as `"ymd"` or `"ymd_hms"`
+#'
+#' @return a vector of all possible formats derived from the input
+#' order
+#'
+#' @noRd
 build_format_from_order <- function(order) {
   char_list <- list(
     "y" = c("%y", "%Y"),
@@ -269,7 +287,7 @@ build_format_from_order <- function(order) {
 #' dates to be parsed.
 #' @param orders a character vector of date-time formats.
 #'
-#' @return a list made up of 4 list, each a different version of x:
+#' @return a list made up of 4 lists, each a different version of x:
 #'  * `processed_x`
 #'  * `augmented_x_ym`
 #'  * `augmented_x_yq`
@@ -387,7 +405,8 @@ attempt_parsing <- function(x,
 #'
 #' @param x an Expression corresponding to a character or numeric vector of
 #' dates to be parsed.
-#' @param formats a vector of formats as returned by `build_format_from_order`
+#' @param formats a character vector of formats as returned by
+#' `build_format_from_order`
 #'
 #' @return a list of Expressions
 #' @noRd

From 4311ae6e15250ee094c5a55fcafa5880a29361ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Tue, 21 Jun 2022 13:28:09 +0100
Subject: [PATCH 43/47] more unit tests for `build_formats()` and
 `build_format_from_order()`

---
 r/tests/testthat/test-dplyr-funcs-datetime.R | 29 +++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 9865f068d70..77975929ec3 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -2227,7 +2227,7 @@ test_that("parse_date_time with `exact = TRUE`, and with regular R objects", {
   )
 })
 
-test_that("build_formats", {
+test_that("build_formats() and build_format_from_order()", {
   expect_equal(
     build_formats(c("ym", "myd", "%Y-%d-%m")),
     c(
@@ -2242,6 +2242,14 @@ test_that("build_formats", {
       "%y%d%m", "%Y%d%m", "%y%d%B", "%Y%d%B", "%y%d%b", "%Y%d%b")
   )
 
+  expect_equal(
+    build_formats("ymd_HMS"),
+    c("%y-%m-%d-%H-%M-%S", "%Y-%m-%d-%H-%M-%S", "%y-%B-%d-%H-%M-%S",
+      "%Y-%B-%d-%H-%M-%S", "%y-%b-%d-%H-%M-%S", "%Y-%b-%d-%H-%M-%S",
+      "%y%m%d%H%M%S", "%Y%m%d%H%M%S", "%y%B%d%H%M%S", "%Y%B%d%H%M%S",
+      "%y%b%d%H%M%S", "%Y%b%d%H%M%S")
+  )
+
   # when order is one of "yq", "qy", "ym" or"my" the data is augmented to "ymd"
   # or "ydm" and the formats are built accordingly
   ymd_formats <- c(
@@ -2279,4 +2287,23 @@ test_that("build_formats", {
     build_formats("abd"),
     '"abd" `orders` not supported in Arrow'
   )
+
+  expect_error(
+    build_formats("vup"),
+    '"vup" `orders` not supported in Arrow'
+  )
+
+  expect_equal(
+    build_format_from_order("ymd"),
+    c("%y-%m-%d", "%Y-%m-%d", "%y-%B-%d", "%Y-%B-%d", "%y-%b-%d", "%Y-%b-%d",
+      "%y%m%d", "%Y%m%d", "%y%B%d", "%Y%B%d", "%y%b%d", "%Y%b%d")
+  )
+
+  expect_equal(
+    build_format_from_order("ymdHMS"),
+    c("%y-%m-%d-%H-%M-%S", "%Y-%m-%d-%H-%M-%S", "%y-%B-%d-%H-%M-%S",
+      "%Y-%B-%d-%H-%M-%S", "%y-%b-%d-%H-%M-%S", "%Y-%b-%d-%H-%M-%S",
+      "%y%m%d%H%M%S", "%Y%m%d%H%M%S", "%y%B%d%H%M%S", "%Y%B%d%H%M%S",
+      "%y%b%d%H%M%S", "%Y%b%d%H%M%S")
+  )
 })

From 423eb50f8b233202f2f4a6949f8da553ee358d43 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Tue, 21 Jun 2022 13:30:35 +0100
Subject: [PATCH 44/47] update

---
 r/R/dplyr-datetime-helpers.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R
index fb8e724448d..83d8ae97683 100644
--- a/r/R/dplyr-datetime-helpers.R
+++ b/r/R/dplyr-datetime-helpers.R
@@ -154,8 +154,9 @@ binding_as_date_numeric <- function(x, origin = "1970-01-01") {
 
 #' Build formats from multiple orders
 #'
-#' This function is a vectorisation of `build_format_from_order()`. It also
-#' checks if the supplied orders are currently supported.
+#' This function is a vectorised version of `build_format_from_order()`. In
+#' addition to `build_format_from_order()`, it also checks if the supplied
+#' orders are currently supported.
 #'
 #' @inheritParams process_data_for_parsing
 #'

From 4a5e62ff1891af2dae848d9370f563be999d9ba5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Thu, 23 Jun 2022 14:18:33 +0100
Subject: [PATCH 45/47] style

---
 r/R/dplyr-datetime-helpers.R | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R
index 83d8ae97683..60771c86241 100644
--- a/r/R/dplyr-datetime-helpers.R
+++ b/r/R/dplyr-datetime-helpers.R
@@ -294,8 +294,7 @@ build_format_from_order <- function(order) {
 #'  * `augmented_x_yq`
 #'  * `augmented_x_qy`
 #' @noRd
-process_data_for_parsing <- function(x,
-                                     orders) {
+process_data_for_parsing <- function(x, orders) {
 
   processed_x <- x$cast(string())
 
@@ -369,8 +368,7 @@ process_data_for_parsing <- function(x,
 #'
 #' @return a list of `strptime` Expressions we can use with `coalesce`
 #' @noRd
-attempt_parsing <- function(x,
-                            orders) {
+attempt_parsing <- function(x, orders) {
   # translate orders into possible formats
   formats <- build_formats(orders)
 

From b1f35ffed7ea02b46768d95520f1eca85b5446cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Thu, 23 Jun 2022 14:29:53 +0100
Subject: [PATCH 46/47] reorganised unit tests

---
 r/tests/testthat/test-dplyr-funcs-datetime.R | 167 +++++++++----------
 1 file changed, 80 insertions(+), 87 deletions(-)

diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 77975929ec3..5ec613c2f89 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -2025,33 +2025,23 @@ test_that("parse_date_time with hours, minutes and seconds components", {
     test_dates_times
   )
 
+  # parse_date_time with timezone
+  pm_tz <- "Pacific/Marquesas"
   compare_dplyr_binding(
     .input %>%
       mutate(
-        ymd_hms_dttm =
-          parse_date_time(ymd_hms_string, orders = "ymd_HMS", tz = "Pacific/Marquesas"),
-        ymd_hm_dttm =
-          parse_date_time(ymd_hm_string, orders = "ymd_HM", tz = "Pacific/Marquesas"),
-        ymd_h_dttm =
-          parse_date_time(ymd_h_string, orders = "ymd_H", tz = "Pacific/Marquesas"),
-        dmy_hms_dttm =
-          parse_date_time(dmy_hms_string, orders = "dmy_HMS", tz = "Pacific/Marquesas"),
-        dmy_hm_dttm =
-          parse_date_time(dmy_hm_string, orders = "dmy_HM", tz = "Pacific/Marquesas"),
-        dmy_h_dttm =
-          parse_date_time(dmy_h_string, orders = "dmy_H", tz = "Pacific/Marquesas"),
-        mdy_hms_dttm =
-          parse_date_time(mdy_hms_string, orders = "mdy_HMS", tz = "Pacific/Marquesas"),
-        mdy_hm_dttm =
-          parse_date_time(mdy_hm_string, orders = "mdy_HM", tz = "Pacific/Marquesas"),
-        mdy_h_dttm =
-          parse_date_time(mdy_h_string, orders = "mdy_H", tz = "Pacific/Marquesas"),
-        ydm_hms_dttm =
-          parse_date_time(ydm_hms_string, orders = "ydm_HMS", tz = "Pacific/Marquesas"),
-        ydm_hm_dttm =
-          parse_date_time(ydm_hm_string, orders = "ydm_HM", tz = "Pacific/Marquesas"),
-        ydm_h_dttm =
-          parse_date_time(ydm_h_string, orders = "ydm_H", tz = "Pacific/Marquesas")
+        ymd_hms_dttm = parse_date_time(ymd_hms_string, orders = "ymd_HMS", tz = pm_tz),
+        ymd_hm_dttm = parse_date_time(ymd_hm_string, orders = "ymd_HM", tz = pm_tz),
+        ymd_h_dttm = parse_date_time(ymd_h_string, orders = "ymd_H", tz = pm_tz),
+        dmy_hms_dttm = parse_date_time(dmy_hms_string, orders = "dmy_HMS", tz = pm_tz),
+        dmy_hm_dttm = parse_date_time(dmy_hm_string, orders = "dmy_HM", tz = pm_tz),
+        dmy_h_dttm = parse_date_time(dmy_h_string, orders = "dmy_H", tz = pm_tz),
+        mdy_hms_dttm = parse_date_time(mdy_hms_string, orders = "mdy_HMS", tz = pm_tz),
+        mdy_hm_dttm = parse_date_time(mdy_hm_string, orders = "mdy_HM", tz = pm_tz),
+        mdy_h_dttm = parse_date_time(mdy_h_string, orders = "mdy_H", tz = pm_tz),
+        ydm_hms_dttm = parse_date_time(ydm_hms_string, orders = "ydm_HMS", tz = pm_tz),
+        ydm_hm_dttm = parse_date_time(ydm_hm_string, orders = "ydm_HM", tz = pm_tz),
+        ydm_h_dttm = parse_date_time(ydm_h_string, orders = "ydm_H", tz = pm_tz)
       ) %>%
       collect(),
     test_dates_times
@@ -2075,65 +2065,12 @@ test_that("parse_date_time with hours, minutes and seconds components", {
        c("67-01-09 9:34:56", "1970-05-22 10:13:59", "19870822171359", NA)
    )
   )
+})
 
-  # test truncated formats
-  test_truncation_df <-  tibble(
-    truncated_ymd_string =
-      c(
-        "2022-05-19 13:46:51",
-        "2022-05-18 13:46",
-        "2022-05-17 13",
-        "2022-05-16"
-      )
-  )
-
-  compare_dplyr_binding(
-    .input %>%
-      mutate(
-        dttm =
-          parse_date_time(
-            truncated_ymd_string,
-            orders = "ymd_HMS",
-            truncated = 3
-          )
-      ) %>%
-      collect(),
-    test_truncation_df
-  )
-
-  # values for truncated greater than nchar(orders) - 3 not supported in Arrow
-  compare_dplyr_binding(
-    .input %>%
-      mutate(
-        dttm =
-          parse_date_time(
-            truncated_ymd_string,
-            orders = "ymd_HMS",
-            truncated = 5
-          )
-      ) %>%
-      collect(),
-    test_truncation_df,
-    warning = "a value for `truncated` > 4 not supported in Arrow"
-  )
-
-
-  # we need expect_warning twice as both the arrow pipeline (because quiet =
-  # FALSE is not supported) and the fallback dplyr/lubridate one throw
-  # warnings (the lubridate one because quiet is FALSE)
-  expect_warning(
-    expect_warning(
-      tibble(x = c("2022-05-19 13:46:51")) %>%
-        arrow_table() %>%
-        mutate(
-          x_dttm = parse_date_time(x, orders = "dmy_HMS", quiet = FALSE)
-        ) %>%
-        collect(),
-      "`quiet = FALSE` not supported in Arrow"
-    ),
-    "All formats failed to parse"
-  )
-
+test_that("parse_date_time with month names and HMS", {
+  # locale (affecting "%b% and "%B" formats) does not work properly on Windows
+  # TODO revisit once https://issues.apache.org/jira/browse/ARROW-16443 is done
+  skip_on_os("windows")
   test_dates_times2 <- tibble(
     ymd_hms_string =
       c("67-Jan-09 12:34:56", "1970-June-22 20:13:59", "87Aug22201359", NA),
@@ -2160,11 +2097,7 @@ test_that("parse_date_time with hours, minutes and seconds components", {
     ydm_h_string =
       c("67-09-Jan 12", "1970-22-June 20", "8722Aug20", NA)
   )
-  # the unseparated strings are versions of "1987-08-22 20:13:59" (with %y)
-
-  # locale (affecting "%b% and "%B" formats) does not work properly on Windows
-  # TODO revisit once https://issues.apache.org/jira/browse/ARROW-16443 is done
-  skip_on_os("windows")
+  # the un-separated strings are versions of "1987-08-22 20:13:59" (with %y)
 
   compare_dplyr_binding(
     .input %>%
@@ -2187,6 +2120,66 @@ test_that("parse_date_time with hours, minutes and seconds components", {
   )
 })
 
+test_that("parse_date_time with `quiet = FALSE` not supported", {
+  # we need expect_warning twice as both the arrow pipeline (because quiet =
+  # FALSE is not supported) and the fallback dplyr/lubridate one throw
+  # warnings (the lubridate one because quiet is FALSE)
+  expect_warning(
+    expect_warning(
+      tibble(x = c("2022-05-19 13:46:51")) %>%
+        arrow_table() %>%
+        mutate(
+          x_dttm = parse_date_time(x, orders = "dmy_HMS", quiet = FALSE)
+        ) %>%
+        collect(),
+      "`quiet = FALSE` not supported in Arrow"
+    ),
+    "All formats failed to parse"
+  )
+})
+
+test_that("parse_date_time with truncated formats", {
+  test_truncation_df <-  tibble(
+    truncated_ymd_string =
+      c(
+        "2022-05-19 13:46:51",
+        "2022-05-18 13:46",
+        "2022-05-17 13",
+        "2022-05-16"
+      )
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        dttm =
+          parse_date_time(
+            truncated_ymd_string,
+            orders = "ymd_HMS",
+            truncated = 3
+          )
+      ) %>%
+      collect(),
+    test_truncation_df
+  )
+
+  # values for truncated greater than nchar(orders) - 3 not supported in Arrow
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        dttm =
+          parse_date_time(
+            truncated_ymd_string,
+            orders = "ymd_HMS",
+            truncated = 5
+          )
+      ) %>%
+      collect(),
+    test_truncation_df,
+    warning = "a value for `truncated` > 4 not supported in Arrow"
+  )
+})
+
 test_that("parse_date_time with `exact = TRUE`, and with regular R objects", {
   test_df <- tibble(
     x = c("2022-12-31 12:59:59", "2022-01-01 12:11", "2022-01-01 12", "2022-01-01", NA),

From 5fc561fdc6b5bfc83ba707353ac7d406b9e42b98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Thu, 23 Jun 2022 15:22:50 +0100
Subject: [PATCH 47/47] skip test on win & R 3.6

---
 r/tests/testthat/test-dplyr-funcs-datetime.R | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 5ec613c2f89..15af0c9f8db 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -2139,6 +2139,10 @@ test_that("parse_date_time with `quiet = FALSE` not supported", {
 })
 
 test_that("parse_date_time with truncated formats", {
+  # these functions' internals use some string processing which requires the
+  # RE2 library (not available on Windows with R 3.6)
+  skip_if_not_available("re2")
+
   test_truncation_df <-  tibble(
     truncated_ymd_string =
       c(