From 1ead9c5f580bac8593c5f3452e4c6ab06fcc4ec2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 11 Feb 2022 12:55:50 +0000 Subject: [PATCH 01/13] add unit test to show the different returns from strptime with mistmatched format --- r/tests/testthat/test-dplyr-funcs-datetime.R | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 6df40505d1a..565d2cbe97b 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -118,6 +118,17 @@ test_that("errors in strptime", { ) }) +test_that("strptime returns NA when format doesn't match the data", { + df <- tibble(a = "2022-02-07") + + compare_dplyr_binding( + .input %>% + mutate(b = strptime(a, format = "%Y %m-%d")) %>% + collect(), + df + ) +}) + test_that("strftime", { times <- tibble( datetime = c(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Etc/GMT+6"), NA), From 962ca1bfb8587364d872b3d34b38b77fe1ea56a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 25 Mar 2022 10:21:02 +0000 Subject: [PATCH 02/13] `strptime` returns `NA` / `NULL` instead of error --- r/R/dplyr-funcs-datetime.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index c583aed5472..5ff0b043996 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -40,7 +40,7 @@ register_bindings_datetime <- function() { unit <- make_valid_time_unit(unit, c(valid_time64_units, valid_time32_units)) - Expression$create("strptime", x, options = list(format = format, unit = unit)) + Expression$create("strptime", x, options = list(format = format, unit = unit, error_is_null = TRUE)) }) register_binding("strftime", function(x, format = "", tz = "", usetz = FALSE) { From 48eee0af76ff3260d1fb84ce8d11927086913566 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 25 Mar 2022 10:32:11 +0000 Subject: [PATCH 03/13] add failing unit test --- r/tests/testthat/test-dplyr-funcs-datetime.R | 38 +++++++++++++++++--- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 565d2cbe97b..2b00f5caec5 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -119,14 +119,42 @@ test_that("errors in strptime", { }) test_that("strptime returns NA when format doesn't match the data", { - df <- tibble(a = "2022-02-07") + df <- tibble(str_date = c("2022-02-07", "2022 02-07")) - compare_dplyr_binding( - .input %>% - mutate(b = strptime(a, format = "%Y %m-%d")) %>% + # "2022 02-07 10:12:14" + + expect_equal( + df %>% + arrow_table() %>% + mutate(parsed_date = strptime(str_date, format = "%Y-%m-%d")) %>% collect(), - df + tibble( + str_date = c("2022-02-07", "2022 02-07"), + parsed_date = as.POSIXct(c("2022-02-07 00:00:00", NA)) + ), + ignore_attr = TRUE ) + + + # something is weird when the Ym separator is something else than a hyphen + expect_equal( + df %>% + arrow_table() %>% + mutate(parsed_date = strptime(str_date, format = "%Y %m-%d")) %>% + collect(), + tibble( + str_date = c("2022-02-07", "2022 02-07"), + parsed_date = as.POSIXct(c(NA, "2022-02-07 00:00:00")) + ), + ignore_attr = TRUE + ) + + # compare_dplyr_binding( + # .input %>% + # mutate(b = strptime(str_date, format = "%Y %m-%d")) %>% + # collect(), + # df + # ) }) test_that("strftime", { From 4894e8b7c758f538b7fa775d185fd63819d76dd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 28 Mar 2022 10:30:02 +0100 Subject: [PATCH 04/13] added a C++ unit test to try and replicate the weird behavior i noticed in R --- cpp/src/arrow/compute/kernels/scalar_string_test.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc index 5332082f4c9..0f5501ce7aa 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc @@ -1845,10 +1845,12 @@ TYPED_TEST(TestStringKernels, Strptime) { std::string input3 = R"(["5/1/2020", "AA/BB/CCCC"])"; std::string input4 = R"(["5/1/2020", "AA/BB/CCCC", "AA/BB/CCCC", "AA/BB/CCCC", null])"; std::string input5 = R"(["5/1/2020 %z", null, null, "12/13/1900 %z", null])"; + std::string input6 = R"(["2022-02-07", "1989 12-22"])"; std::string output1 = R"(["2020-05-01", null, null, "1900-12-13", null])"; std::string output2 = R"([null, "1900-12-13"])"; std::string output3 = R"(["2020-05-01", null])"; std::string output4 = R"(["2020-01-05", null, null, null, null])"; + std::string output6 = R"([null, "1989-12-22"])"; StrptimeOptions options("%m/%d/%Y", TimeUnit::MICRO, /*error_is_null=*/true); auto unit = timestamp(TimeUnit::MICRO); @@ -1862,6 +1864,9 @@ TYPED_TEST(TestStringKernels, Strptime) { options.format = "%m/%d/%Y %%z"; this->CheckUnary("strptime", input5, unit, output1, &options); + options.format = "%Y %m-%d"; + this->CheckUnary("strptime", input6, unit, output6, &options); + options.error_is_null = false; this->CheckUnary("strptime", input5, unit, output1, &options); From 10708d7e3759c31e92393ba82df3b5d70c9cf195 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 28 Mar 2022 11:31:16 +0100 Subject: [PATCH 05/13] extend the C++ test --- cpp/src/arrow/compute/kernels/scalar_string_test.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc index 0f5501ce7aa..f24e4641741 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc @@ -1845,12 +1845,12 @@ TYPED_TEST(TestStringKernels, Strptime) { std::string input3 = R"(["5/1/2020", "AA/BB/CCCC"])"; std::string input4 = R"(["5/1/2020", "AA/BB/CCCC", "AA/BB/CCCC", "AA/BB/CCCC", null])"; std::string input5 = R"(["5/1/2020 %z", null, null, "12/13/1900 %z", null])"; - std::string input6 = R"(["2022-02-07", "1989 12-22"])"; + std::string input6 = R"(["2022-02-07", "2012/03-28", "1975/01-02", "1981/01-07"])"; std::string output1 = R"(["2020-05-01", null, null, "1900-12-13", null])"; std::string output2 = R"([null, "1900-12-13"])"; std::string output3 = R"(["2020-05-01", null])"; std::string output4 = R"(["2020-01-05", null, null, null, null])"; - std::string output6 = R"([null, "1989-12-22"])"; + std::string output6 = R"([null, "2012-03-28", "1975-01-02", "1981-01-07"])"; StrptimeOptions options("%m/%d/%Y", TimeUnit::MICRO, /*error_is_null=*/true); auto unit = timestamp(TimeUnit::MICRO); @@ -1864,7 +1864,7 @@ TYPED_TEST(TestStringKernels, Strptime) { options.format = "%m/%d/%Y %%z"; this->CheckUnary("strptime", input5, unit, output1, &options); - options.format = "%Y %m-%d"; + options.format = "%Y/%m-%d"; this->CheckUnary("strptime", input6, unit, output6, &options); options.error_is_null = false; From 3d56e03abb62e89241432f7f0c7cb709ff125657 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 28 Mar 2022 13:23:26 +0100 Subject: [PATCH 06/13] unit test for the `"%m/%d/%Y %z"` format --- cpp/src/arrow/compute/kernels/scalar_string_test.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc index f24e4641741..a790cb09053 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc @@ -1846,6 +1846,7 @@ TYPED_TEST(TestStringKernels, Strptime) { std::string input4 = R"(["5/1/2020", "AA/BB/CCCC", "AA/BB/CCCC", "AA/BB/CCCC", null])"; std::string input5 = R"(["5/1/2020 %z", null, null, "12/13/1900 %z", null])"; std::string input6 = R"(["2022-02-07", "2012/03-28", "1975/01-02", "1981/01-07"])"; + std::string input7 = R"(["02-07-2022", "03/28/2012", "01/02/1975", "01/07/1981"])"; std::string output1 = R"(["2020-05-01", null, null, "1900-12-13", null])"; std::string output2 = R"([null, "1900-12-13"])"; std::string output3 = R"(["2020-05-01", null])"; @@ -1867,6 +1868,9 @@ TYPED_TEST(TestStringKernels, Strptime) { options.format = "%Y/%m-%d"; this->CheckUnary("strptime", input6, unit, output6, &options); + options.format = "%m/%d/%Y %%z"; + this->CheckUnary("strptime", input7, unit, output6, &options); + options.error_is_null = false; this->CheckUnary("strptime", input5, unit, output1, &options); From 96108a55e2396168801140398b1e431f7174f240 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 28 Mar 2022 14:09:24 +0100 Subject: [PATCH 07/13] test --- r/tests/testthat/test-dplyr-funcs-datetime.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 2b00f5caec5..81ce766526c 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -121,8 +121,6 @@ test_that("errors in strptime", { test_that("strptime returns NA when format doesn't match the data", { df <- tibble(str_date = c("2022-02-07", "2022 02-07")) - # "2022 02-07 10:12:14" - expect_equal( df %>% arrow_table() %>% @@ -136,7 +134,7 @@ test_that("strptime returns NA when format doesn't match the data", { ) - # something is weird when the Ym separator is something else than a hyphen + # something is weird when the first element fails to parse expect_equal( df %>% arrow_table() %>% From 870ca53de7a935c6ade546ea6d1446180feb8b17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 28 Mar 2022 15:59:27 +0100 Subject: [PATCH 08/13] cleaned-up tests --- r/tests/testthat/test-dplyr-funcs-datetime.R | 22 +++++++------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 81ce766526c..cd205f79856 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -120,6 +120,9 @@ test_that("errors in strptime", { test_that("strptime returns NA when format doesn't match the data", { df <- tibble(str_date = c("2022-02-07", "2022 02-07")) + df <- tibble( + str_date = c("2022-02-07", "2012/02-07", "1975/01-02", "1981/01-07") + ) expect_equal( df %>% @@ -127,32 +130,23 @@ test_that("strptime returns NA when format doesn't match the data", { mutate(parsed_date = strptime(str_date, format = "%Y-%m-%d")) %>% collect(), tibble( - str_date = c("2022-02-07", "2022 02-07"), - parsed_date = as.POSIXct(c("2022-02-07 00:00:00", NA)) + str_date = c("2022-02-07", "2012/02-07", "1975/01-02", "1981/01-07"), + parsed_date = as.POSIXct(c("2022-02-07 00:00:00", NA, NA, NA)) ), ignore_attr = TRUE ) - - # something is weird when the first element fails to parse expect_equal( df %>% arrow_table() %>% - mutate(parsed_date = strptime(str_date, format = "%Y %m-%d")) %>% + mutate(parsed_date = strptime(str_date, format = "%Y/%m-%d")) %>% collect(), tibble( - str_date = c("2022-02-07", "2022 02-07"), - parsed_date = as.POSIXct(c(NA, "2022-02-07 00:00:00")) + str_date = c("2022-02-07", "2012/02-07", "1975/01-02", "1981/01-07"), + parsed_date = as.POSIXct(c(NA, "2012-02-07", "1975-01-02", "1981-01-07")) ), ignore_attr = TRUE ) - - # compare_dplyr_binding( - # .input %>% - # mutate(b = strptime(str_date, format = "%Y %m-%d")) %>% - # collect(), - # df - # ) }) test_that("strftime", { From cb4ecd52da706e25a60ecf98f44583014818abd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 28 Mar 2022 16:02:12 +0100 Subject: [PATCH 09/13] use `build_expr()` to support regular R objects too --- r/R/dplyr-funcs-datetime.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 5ff0b043996..754d02a4368 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -40,7 +40,7 @@ register_bindings_datetime <- function() { unit <- make_valid_time_unit(unit, c(valid_time64_units, valid_time32_units)) - Expression$create("strptime", x, options = list(format = format, unit = unit, error_is_null = TRUE)) + build_expr("strptime", x, options = list(format = format, unit = unit, error_is_null = TRUE)) }) register_binding("strftime", function(x, format = "", tz = "", usetz = FALSE) { From c696fb7b48e2349fc2cb1fa796c4b4e5509fe7e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 28 Mar 2022 16:20:55 +0100 Subject: [PATCH 10/13] added unit test for R objects --- r/tests/testthat/test-dplyr-funcs-datetime.R | 22 ++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index cd205f79856..235f4873bd0 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -119,11 +119,25 @@ test_that("errors in strptime", { }) test_that("strptime returns NA when format doesn't match the data", { - df <- tibble(str_date = c("2022-02-07", "2022 02-07")) df <- tibble( str_date = c("2022-02-07", "2012/02-07", "1975/01-02", "1981/01-07") ) + expect_equal( + df %>% + arrow_table() %>% + mutate( + r_obj_parsed_date = strptime("03-27/2022", format = "%m-%d/%Y"), + r_obj_parsed_na = strptime("03-27/2022", format = "Y%-%m-%d")) %>% + collect(), + tibble( + str_date = c("2022-02-07", "2012/02-07", "1975/01-02", "1981/01-07"), + r_obj_parsed_date = as.POSIXct(rep("2022-03-27", 4)), + r_obj_parsed_na = as.POSIXct(rep(NA, 4)) + ), + ignore_attr = "tzone" + ) + expect_equal( df %>% arrow_table() %>% @@ -131,9 +145,9 @@ test_that("strptime returns NA when format doesn't match the data", { collect(), tibble( str_date = c("2022-02-07", "2012/02-07", "1975/01-02", "1981/01-07"), - parsed_date = as.POSIXct(c("2022-02-07 00:00:00", NA, NA, NA)) + parsed_date = as.POSIXct(c("2022-02-07", NA, NA, NA)) ), - ignore_attr = TRUE + ignore_attr = "tzone" ) expect_equal( @@ -145,7 +159,7 @@ test_that("strptime returns NA when format doesn't match the data", { str_date = c("2022-02-07", "2012/02-07", "1975/01-02", "1981/01-07"), parsed_date = as.POSIXct(c(NA, "2012-02-07", "1975-01-02", "1981-01-07")) ), - ignore_attr = TRUE + ignore_attr = "tzone" ) }) From e10bb1e3026f23a1ee43f64a6325da06baddba87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 29 Mar 2022 14:20:15 +0100 Subject: [PATCH 11/13] removed the C++ unit tests --- cpp/src/arrow/compute/kernels/scalar_string_test.cc | 9 --------- 1 file changed, 9 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc index a790cb09053..5332082f4c9 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc @@ -1845,13 +1845,10 @@ TYPED_TEST(TestStringKernels, Strptime) { std::string input3 = R"(["5/1/2020", "AA/BB/CCCC"])"; std::string input4 = R"(["5/1/2020", "AA/BB/CCCC", "AA/BB/CCCC", "AA/BB/CCCC", null])"; std::string input5 = R"(["5/1/2020 %z", null, null, "12/13/1900 %z", null])"; - std::string input6 = R"(["2022-02-07", "2012/03-28", "1975/01-02", "1981/01-07"])"; - std::string input7 = R"(["02-07-2022", "03/28/2012", "01/02/1975", "01/07/1981"])"; std::string output1 = R"(["2020-05-01", null, null, "1900-12-13", null])"; std::string output2 = R"([null, "1900-12-13"])"; std::string output3 = R"(["2020-05-01", null])"; std::string output4 = R"(["2020-01-05", null, null, null, null])"; - std::string output6 = R"([null, "2012-03-28", "1975-01-02", "1981-01-07"])"; StrptimeOptions options("%m/%d/%Y", TimeUnit::MICRO, /*error_is_null=*/true); auto unit = timestamp(TimeUnit::MICRO); @@ -1865,12 +1862,6 @@ TYPED_TEST(TestStringKernels, Strptime) { options.format = "%m/%d/%Y %%z"; this->CheckUnary("strptime", input5, unit, output1, &options); - options.format = "%Y/%m-%d"; - this->CheckUnary("strptime", input6, unit, output6, &options); - - options.format = "%m/%d/%Y %%z"; - this->CheckUnary("strptime", input7, unit, output6, &options); - options.error_is_null = false; this->CheckUnary("strptime", input5, unit, output1, &options); From 5d19668d4bab5bdc1ade2f9986875311cbb01931 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 29 Mar 2022 20:51:51 +0100 Subject: [PATCH 12/13] testing with NA --- r/tests/testthat/test-dplyr-funcs-datetime.R | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 235f4873bd0..68206c701c2 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -120,7 +120,7 @@ test_that("errors in strptime", { test_that("strptime returns NA when format doesn't match the data", { df <- tibble( - str_date = c("2022-02-07", "2012/02-07", "1975/01-02", "1981/01-07") + str_date = c("2022-02-07", "2012/02-07", "1975/01-02", "1981/01-07", NA) ) expect_equal( @@ -131,9 +131,9 @@ test_that("strptime returns NA when format doesn't match the data", { r_obj_parsed_na = strptime("03-27/2022", format = "Y%-%m-%d")) %>% collect(), tibble( - str_date = c("2022-02-07", "2012/02-07", "1975/01-02", "1981/01-07"), - r_obj_parsed_date = as.POSIXct(rep("2022-03-27", 4)), - r_obj_parsed_na = as.POSIXct(rep(NA, 4)) + str_date = c("2022-02-07", "2012/02-07", "1975/01-02", "1981/01-07", NA), + r_obj_parsed_date = as.POSIXct(rep("2022-03-27", 5)), + r_obj_parsed_na = as.POSIXct(rep(NA, 5)) ), ignore_attr = "tzone" ) @@ -144,8 +144,8 @@ test_that("strptime returns NA when format doesn't match the data", { mutate(parsed_date = strptime(str_date, format = "%Y-%m-%d")) %>% collect(), tibble( - str_date = c("2022-02-07", "2012/02-07", "1975/01-02", "1981/01-07"), - parsed_date = as.POSIXct(c("2022-02-07", NA, NA, NA)) + str_date = c("2022-02-07", "2012/02-07", "1975/01-02", "1981/01-07", NA), + parsed_date = as.POSIXct(c("2022-02-07", NA, NA, NA, NA)) ), ignore_attr = "tzone" ) @@ -156,8 +156,8 @@ test_that("strptime returns NA when format doesn't match the data", { mutate(parsed_date = strptime(str_date, format = "%Y/%m-%d")) %>% collect(), tibble( - str_date = c("2022-02-07", "2012/02-07", "1975/01-02", "1981/01-07"), - parsed_date = as.POSIXct(c(NA, "2012-02-07", "1975-01-02", "1981-01-07")) + str_date = c("2022-02-07", "2012/02-07", "1975/01-02", "1981/01-07", NA), + parsed_date = as.POSIXct(c(NA, "2012-02-07", "1975-01-02", "1981-01-07", NA)) ), ignore_attr = "tzone" ) From f669be7ccb4a8731e3889b16b364b9da71d0b7f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 30 Mar 2022 10:11:01 +0100 Subject: [PATCH 13/13] updated tests to be more `compare_dplyr`-like --- r/tests/testthat/test-dplyr-funcs-datetime.R | 25 +++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 68206c701c2..16e4958f1cc 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -123,6 +123,8 @@ test_that("strptime returns NA when format doesn't match the data", { str_date = c("2022-02-07", "2012/02-07", "1975/01-02", "1981/01-07", NA) ) + # base::strptime() returns a POSIXlt object (a list), while the Arrow binding + # returns a POSIXct (double) vector => we cannot use compare_dplyr_binding() expect_equal( df %>% arrow_table() %>% @@ -130,23 +132,20 @@ test_that("strptime returns NA when format doesn't match the data", { r_obj_parsed_date = strptime("03-27/2022", format = "%m-%d/%Y"), r_obj_parsed_na = strptime("03-27/2022", format = "Y%-%m-%d")) %>% collect(), - tibble( - str_date = c("2022-02-07", "2012/02-07", "1975/01-02", "1981/01-07", NA), - r_obj_parsed_date = as.POSIXct(rep("2022-03-27", 5)), - r_obj_parsed_na = as.POSIXct(rep(NA, 5)) - ), + df %>% + mutate( + r_obj_parsed_date = as.POSIXct(strptime("03-27/2022", format = "%m-%d/%Y")), + r_obj_parsed_na = as.POSIXct(strptime("03-27/2022", format = "Y%-%m-%d"))), ignore_attr = "tzone" ) expect_equal( df %>% - arrow_table() %>% + record_batch() %>% mutate(parsed_date = strptime(str_date, format = "%Y-%m-%d")) %>% collect(), - tibble( - str_date = c("2022-02-07", "2012/02-07", "1975/01-02", "1981/01-07", NA), - parsed_date = as.POSIXct(c("2022-02-07", NA, NA, NA, NA)) - ), + df %>% + mutate(parsed_date = as.POSIXct(strptime(str_date, format = "%Y-%m-%d"))), ignore_attr = "tzone" ) @@ -155,10 +154,8 @@ test_that("strptime returns NA when format doesn't match the data", { arrow_table() %>% mutate(parsed_date = strptime(str_date, format = "%Y/%m-%d")) %>% collect(), - tibble( - str_date = c("2022-02-07", "2012/02-07", "1975/01-02", "1981/01-07", NA), - parsed_date = as.POSIXct(c(NA, "2012-02-07", "1975-01-02", "1981-01-07", NA)) - ), + df %>% + mutate(parsed_date = as.POSIXct(strptime(str_date, format = "%Y/%m-%d"))), ignore_attr = "tzone" ) })