diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 27d6e889199..92181296b3f 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -414,6 +414,27 @@ nse_funcs$pmax <- function(..., na.rm = FALSE) { ) } +nse_funcs$str_pad <- function(string, width, side = c("left", "right", "both"), pad = " ") { + + assert_that(is_integerish(width)) + side <- match.arg(side) + assert_that(is.string(pad)) + + if (side == "left") { + pad_func = "utf8_lpad" + } else if (side == "right") { + pad_func = "utf8_rpad" + } else if (side == "both") { + pad_func = "utf8_center" + } + + Expression$create( + pad_func, + string, + options = list(width = width, padding = pad) + ) +} + # String function helpers # format `pattern` as needed for case insensitivity and literal matching by RE2 diff --git a/r/R/expression.R b/r/R/expression.R index de140832374..9b4b79e458a 100644 --- a/r/R/expression.R +++ b/r/R/expression.R @@ -30,6 +30,7 @@ "str_length" = "utf8_length", "str_to_lower" = "utf8_lower", "str_to_upper" = "utf8_upper", + # str_pad is defined in dplyr-functions.R "str_reverse" = "utf8_reverse", # str_trim is defined in dplyr-functions.R "year" = "year", diff --git a/r/src/compute.cpp b/r/src/compute.cpp index 458e0e386e9..8d0f0793549 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -285,6 +285,14 @@ std::shared_ptr make_compute_options( max_splits, reverse); } + if (func_name == "utf8_lpad" || func_name == "utf8_rpad" || + func_name == "utf8_center" || func_name == "ascii_lpad" || + func_name == "ascii_rpad" || func_name == "ascii_center") { + using Options = arrow::compute::PadOptions; + return std::make_shared(cpp11::as_cpp(options["width"]), + cpp11::as_cpp(options["padding"])); + } + if (func_name == "utf8_split_whitespace" || func_name == "ascii_split_whitespace") { using Options = arrow::compute::SplitOptions; int64_t max_splits = -1; diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index ecbe2f00f2d..438f1038e57 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -866,3 +866,44 @@ test_that("str_like", { df ) }) + +test_that("str_pad", { + + df <- tibble(x = c("Foo and bar", "baz and qux and quux")) + + expect_dplyr_equal( + input %>% + mutate(x = str_pad(x, width = 31)) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + mutate(x = str_pad(x, width = 30, side = "right")) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + mutate(x = str_pad(x, width = 31, side = "left", pad = "+")) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + mutate(x = str_pad(x, width = 10, side = "left", pad = "+")) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + mutate(x = str_pad(x, width = 31, side = "both")) %>% + collect(), + df + ) + +})