From ccbd3c8024af4baf90cdb7ebf889bf37e04f70f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= <dragos.mold@gmail.com>
Date: Wed, 20 Oct 2021 13:42:17 +0100
Subject: [PATCH 1/3] added test for str_to_sentence & binding to
 utf8_capitalize

---
 r/R/dplyr-functions.R                      |  5 +++++
 r/tests/testthat/test-dplyr-funcs-string.R | 11 +++++++++++
 2 files changed, 16 insertions(+)

diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index dbb9d5f46f6..b314f97a450 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -367,6 +367,11 @@ nse_funcs$str_to_title <- function(string, locale = "en") {
   Expression$create("utf8_title", string)
 }
 
+nse_funcs$str_to_sentence <- function(string, locale = "en") {
+  stop_if_locale_provided(locale)
+  Expression$create("utf8_capitalize", string)
+}
+
 nse_funcs$str_trim <- function(string, side = c("both", "left", "right")) {
   side <- match.arg(side)
   trim_fun <- switch(side,
diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R
index dd59b5ac55d..b0ae394307b 100644
--- a/r/tests/testthat/test-dplyr-funcs-string.R
+++ b/r/tests/testthat/test-dplyr-funcs-string.R
@@ -1336,3 +1336,14 @@ test_that("str_starts, str_ends, startsWith, endsWith", {
     df
   )
 })
+
+test_that("str_to_sentence", {
+  df <- tibble(x = c("first word", "the second word", "the third word"))
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(sentence_case = str_to_sentence(x)) %>%
+      collect(),
+    df
+  )
+})

From e00f21286ecfddef1cadc04abd3b6f3dd3e2b15c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= <dragos.mold@gmail.com>
Date: Wed, 20 Oct 2021 14:46:00 +0100
Subject: [PATCH 2/3] added more tests and comments (in expression)

---
 r/R/dplyr-functions.R                      |  1 +
 r/R/expression.R                           |  1 +
 r/tests/testthat/test-dplyr-funcs-string.R | 14 ++++++++++++--
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index b314f97a450..1a2bf4d9f65 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -341,6 +341,7 @@ arrow_string_join_function <- function(null_handling, null_replacement = NULL) {
 #   str_to_lower
 #   str_to_upper
 #   str_to_title
+#   str_to_sentence
 #
 # Arrow locale will be supported with ARROW-14126
 stop_if_locale_provided <- function(locale) {
diff --git a/r/R/expression.R b/r/R/expression.R
index b1b6635f538..f3110f40ef0 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -53,6 +53,7 @@
   # str_to_lower is defined in dplyr-functions.R
   # str_to_title is defined in dplyr-functions.R
   # str_to_upper is defined in dplyr-functions.R
+  # str_to_sentence is defined in dplyr-functions.R
   # str_trim is defined in dplyr-functions.R
   "stri_reverse" = "utf8_reverse",
   # substr is defined in dplyr-functions.R
diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R
index b0ae394307b..640fb9fa011 100644
--- a/r/tests/testthat/test-dplyr-funcs-string.R
+++ b/r/tests/testthat/test-dplyr-funcs-string.R
@@ -1338,11 +1338,21 @@ test_that("str_starts, str_ends, startsWith, endsWith", {
 })
 
 test_that("str_to_sentence", {
-  df <- tibble(x = c("first word", "the second word", "the third word"))
+  df <- tibble(
+    one_sent = c("first word", "the second word", "the third word"),
+    two_sent = c("first sent. second sent", "second word", "third word")
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(sentence_case = str_to_sentence(one_sent)) %>%
+      collect(),
+    df
+  )
 
   expect_dplyr_equal(
     input %>%
-      mutate(sentence_case = str_to_sentence(x)) %>%
+      mutate(sentence_case_two = str_to_sentence(two_sent)) %>%
       collect(),
     df
   )

From 836fdf897a05bb9870b7360027653567348989a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= <dragos.mold@gmail.com>
Date: Wed, 20 Oct 2021 14:58:35 +0100
Subject: [PATCH 3/3] added a failing test to capture the weird stringr
 behaviour

---
 r/tests/testthat/test-dplyr-funcs-string.R | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R
index 640fb9fa011..f6fc5f313c4 100644
--- a/r/tests/testthat/test-dplyr-funcs-string.R
+++ b/r/tests/testthat/test-dplyr-funcs-string.R
@@ -1340,7 +1340,8 @@ test_that("str_starts, str_ends, startsWith, endsWith", {
 test_that("str_to_sentence", {
   df <- tibble(
     one_sent = c("first word", "the second word", "the third word"),
-    two_sent = c("first sent. second sent", "second word", "third word")
+    two_sent = c("first word. second word? third word! fourth word",
+                 "second word", "third word")
   )
 
   expect_dplyr_equal(
@@ -1350,7 +1351,9 @@ test_that("str_to_sentence", {
     df
   )
 
-  expect_dplyr_equal(
+  # there is something strange going on with str_to_sentence in stringr where
+  # it doesn't recognise `.` as a sentence end
+  expect_dplyr_error(
     input %>%
       mutate(sentence_case_two = str_to_sentence(two_sent)) %>%
       collect(),