From 7d961bf0c1c9d863383f5f631b971724a4446dfe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= <dragos.mold@gmail.com>
Date: Tue, 19 Oct 2021 11:06:54 +0100
Subject: [PATCH 01/12] added tests for str_count

---
 r/tests/testthat/test-dplyr-funcs-string.R | 33 ++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R
index dd59b5ac55d..a68de7f5dad 100644
--- a/r/tests/testthat/test-dplyr-funcs-string.R
+++ b/r/tests/testthat/test-dplyr-funcs-string.R
@@ -1336,3 +1336,36 @@ test_that("str_starts, str_ends, startsWith, endsWith", {
     df
   )
 })
+
+test_that("str_count", {
+  df <- tibble(fruit = c("apple", "banana", "pear", "pineapple"))
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(a_count = str_count(fruit, pattern = "a")) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(p_count = str_count(fruit, pattern = "p")) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(e_count = str_count(fruit, pattern = "e")) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(let_count = str_count(fruit, pattern = c("a", "b", "p", "n"))) %>%
+      collect(),
+    df
+  )
+
+})

From 87cbba94b55a181e02bfbce56d57f466f0bdf74b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= <dragos.mold@gmail.com>
Date: Wed, 20 Oct 2021 09:35:37 +0100
Subject: [PATCH 02/12] more tests and replacing fruits with cities

---
 r/tests/testthat/test-dplyr-funcs-string.R | 40 ++++++++++++++++++----
 1 file changed, 34 insertions(+), 6 deletions(-)

diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R
index a68de7f5dad..ca712f76e5e 100644
--- a/r/tests/testthat/test-dplyr-funcs-string.R
+++ b/r/tests/testthat/test-dplyr-funcs-string.R
@@ -126,7 +126,7 @@ test_that("paste, paste0, and str_c", {
   )
   # emits null in str_c() (consistent with stringr::str_c())
   expect_dplyr_equal(
-    input %>%
+    df %>%
       transmute(str_c(x, y, sep = NA_character_)) %>%
       collect(),
     df
@@ -1338,34 +1338,62 @@ test_that("str_starts, str_ends, startsWith, endsWith", {
 })
 
 test_that("str_count", {
-  df <- tibble(fruit = c("apple", "banana", "pear", "pineapple"))
+  df <- tibble(
+    cities = c("Kolkata", "Dar es Salaam", "Tel Aviv", "San Antonio",
+               "Cluj Napoca", "Bern", "Bogota"),
+    dots = c("a.", "...", ".a.a", "a..a.", "ab...", "dse....", ".f..d.."))
 
   expect_dplyr_equal(
     input %>%
-      mutate(a_count = str_count(fruit, pattern = "a")) %>%
+      mutate(a_count = str_count(cities, pattern = "a")) %>%
       collect(),
     df
   )
 
   expect_dplyr_equal(
     input %>%
-      mutate(p_count = str_count(fruit, pattern = "p")) %>%
+      mutate(p_count = str_count(cities, pattern = "d")) %>%
       collect(),
     df
   )
 
   expect_dplyr_equal(
     input %>%
-      mutate(e_count = str_count(fruit, pattern = "e")) %>%
+      mutate(p_count = str_count(cities,
+                                 pattern = regex("d", ignore_case = TRUE))) %>%
       collect(),
     df
   )
 
   expect_dplyr_equal(
     input %>%
-      mutate(let_count = str_count(fruit, pattern = c("a", "b", "p", "n"))) %>%
+      mutate(e_count = str_count(cities, pattern = "u")) %>%
       collect(),
     df
   )
 
+  # nse_funcs$str_count() is not vectorised over pattern
+  expect_dplyr_equal(
+    input %>%
+      mutate(let_count = str_count(
+        cities,
+        pattern = c("a", "b", "e", "g", "p", "n", "s"))) %>%
+      collect(),
+    df,
+    warning = TRUE
+  )
+
+  expect_dplyr_equal(
+   input %>%
+      mutate(dots_count = str_count(dots, ".")) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(dots_count = str_count(dots, fixed("."))) %>%
+      collect(),
+    df
+  )
 })

From 32ecb358f7f4de018386559780decb729d220a73 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= <dragos.mold@gmail.com>
Date: Wed, 20 Oct 2021 09:35:51 +0100
Subject: [PATCH 03/12] added the binding for srt_count

---
 r/R/dplyr-functions.R | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index dbb9d5f46f6..518b658927b 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -645,6 +645,27 @@ nse_funcs$str_ends <- function(string, pattern, negate = FALSE) {
   out
 }
 
+nse_funcs$str_count <- function(string, pattern) {
+  opts <- get_stringr_pattern_options(enexpr(pattern))
+  if (length(pattern) > 1) {
+    arrow_not_supported("Pattern argument longer than 1")
+  }
+  if (opts$fixed) {
+    out <- Expression$create(
+      "count_substring",
+      string,
+      options = list(pattern = opts$pattern, ignore_case = opts$ignore_case)
+    )
+  } else {
+    out <- Expression$create(
+      "count_substring_regex",
+      string,
+      options = list(pattern = opts$pattern, ignore_case = opts$ignore_case)
+    )
+  }
+  out
+}
+
 # String function helpers
 
 # format `pattern` as needed for case insensitivity and literal matching by RE2

From e8f7801525ebb2a42361353c641e1b6702c16dbd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= <dragos.mold@gmail.com>
Date: Wed, 20 Oct 2021 09:39:06 +0100
Subject: [PATCH 04/12] updated NEWS

---
 r/NEWS.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/r/NEWS.md b/r/NEWS.md
index 065a5d792df..43230899787 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -19,6 +19,8 @@
 
 # arrow 5.0.0.9000
 
+* added bindings for `str_count()`. (ARROW-13156, @dragosmg)
+
 There are now two ways to query Arrow data:
 
 ## 1. Expanded Arrow-native queries: aggregation and joins

From c678cdfb6ae165ea39cf10245956b2eb416caa78 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= <dragos.mold@gmail.com>
Date: Wed, 20 Oct 2021 09:45:54 +0100
Subject: [PATCH 05/12] undo

---
 r/tests/testthat/test-dplyr-funcs-string.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R
index ca712f76e5e..a945b883d94 100644
--- a/r/tests/testthat/test-dplyr-funcs-string.R
+++ b/r/tests/testthat/test-dplyr-funcs-string.R
@@ -126,7 +126,7 @@ test_that("paste, paste0, and str_c", {
   )
   # emits null in str_c() (consistent with stringr::str_c())
   expect_dplyr_equal(
-    df %>%
+    input %>%
       transmute(str_c(x, y, sep = NA_character_)) %>%
       collect(),
     df

From 829bccbe30fc44cf020301a727579fffc64ff873 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Wed, 20 Oct 2021 10:45:31 +0100
Subject: [PATCH 06/12] Update r/R/dplyr-functions.R

Co-authored-by: Nic <thisisnic@gmail.com>
---
 r/R/dplyr-functions.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index 518b658927b..320afe3aa69 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -647,8 +647,8 @@ nse_funcs$str_ends <- function(string, pattern, negate = FALSE) {
 
 nse_funcs$str_count <- function(string, pattern) {
   opts <- get_stringr_pattern_options(enexpr(pattern))
-  if (length(pattern) > 1) {
-    arrow_not_supported("Pattern argument longer than 1")
+  if !is.string(pattern) {
+    arrow_not_supported("`pattern` must be a length 1 character vector; other values")
   }
   if (opts$fixed) {
     out <- Expression$create(

From f819ee42cd984900b737ff244db4f18d8c936bb3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= <dragos.mold@gmail.com>
Date: Wed, 20 Oct 2021 10:52:43 +0100
Subject: [PATCH 07/12] small changes

---
 r/R/dplyr-functions.R | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index 320afe3aa69..84a769ba861 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -647,23 +647,26 @@ nse_funcs$str_ends <- function(string, pattern, negate = FALSE) {
 
 nse_funcs$str_count <- function(string, pattern) {
   opts <- get_stringr_pattern_options(enexpr(pattern))
-  if !is.string(pattern) {
+  if (!is.string(pattern)) {
     arrow_not_supported("`pattern` must be a length 1 character vector; other values")
   }
   if (opts$fixed) {
-    out <- Expression$create(
-      "count_substring",
-      string,
-      options = list(pattern = opts$pattern, ignore_case = opts$ignore_case)
+    return(
+      Expression$create(
+        "count_substring",
+        string,
+        options = list(pattern = opts$pattern, ignore_case = opts$ignore_case)
+      )
     )
   } else {
-    out <- Expression$create(
-      "count_substring_regex",
-      string,
-      options = list(pattern = opts$pattern, ignore_case = opts$ignore_case)
+    return(
+      Expression$create(
+        "count_substring_regex",
+        string,
+        options = list(pattern = opts$pattern, ignore_case = opts$ignore_case)
+      )
     )
   }
-  out
 }
 
 # String function helpers

From 5acad04a9457bde985a5940cd61eec19eafc37cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Wed, 20 Oct 2021 14:45:02 +0100
Subject: [PATCH 08/12] Update r/R/dplyr-functions.R

Co-authored-by: Ian Cook <ianmcook@gmail.com>
---
 r/R/dplyr-functions.R | 23 ++++++-----------------
 1 file changed, 6 insertions(+), 17 deletions(-)

diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index 84a769ba861..717cdae9662 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -650,23 +650,12 @@ nse_funcs$str_count <- function(string, pattern) {
   if (!is.string(pattern)) {
     arrow_not_supported("`pattern` must be a length 1 character vector; other values")
   }
-  if (opts$fixed) {
-    return(
-      Expression$create(
-        "count_substring",
-        string,
-        options = list(pattern = opts$pattern, ignore_case = opts$ignore_case)
-      )
-    )
-  } else {
-    return(
-      Expression$create(
-        "count_substring_regex",
-        string,
-        options = list(pattern = opts$pattern, ignore_case = opts$ignore_case)
-      )
-    )
-  }
+  arrow_fun <- ifelse(opts$fixed, "count_substring", "count_substring_regex")
+  Expression$create(
+    arrow_fun,
+    string,
+    options = list(pattern = opts$pattern, ignore_case = opts$ignore_case)
+  )
 }
 
 # String function helpers

From a4c75d2e85805a868106f00126fe22497885ffd2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= <dragos.mold@gmail.com>
Date: Thu, 21 Oct 2021 10:13:27 +0100
Subject: [PATCH 09/12] styled the test file

---
 r/tests/testthat/test-dplyr-funcs-string.R | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R
index a945b883d94..c649c90c67d 100644
--- a/r/tests/testthat/test-dplyr-funcs-string.R
+++ b/r/tests/testthat/test-dplyr-funcs-string.R
@@ -791,7 +791,8 @@ test_that("strftime", {
   )
 
   withr::with_timezone(
-    "Pacific/Marquesas", {
+    "Pacific/Marquesas",
+    {
       expect_dplyr_equal(
         input %>%
           mutate(
@@ -1339,9 +1340,12 @@ test_that("str_starts, str_ends, startsWith, endsWith", {
 
 test_that("str_count", {
   df <- tibble(
-    cities = c("Kolkata", "Dar es Salaam", "Tel Aviv", "San Antonio",
-               "Cluj Napoca", "Bern", "Bogota"),
-    dots = c("a.", "...", ".a.a", "a..a.", "ab...", "dse....", ".f..d.."))
+    cities = c(
+      "Kolkata", "Dar es Salaam", "Tel Aviv", "San Antonio",
+      "Cluj Napoca", "Bern", "Bogota"
+    ),
+    dots = c("a.", "...", ".a.a", "a..a.", "ab...", "dse....", ".f..d..")
+  )
 
   expect_dplyr_equal(
     input %>%
@@ -1360,7 +1364,8 @@ test_that("str_count", {
   expect_dplyr_equal(
     input %>%
       mutate(p_count = str_count(cities,
-                                 pattern = regex("d", ignore_case = TRUE))) %>%
+        pattern = regex("d", ignore_case = TRUE)
+      )) %>%
       collect(),
     df
   )
@@ -1377,14 +1382,15 @@ test_that("str_count", {
     input %>%
       mutate(let_count = str_count(
         cities,
-        pattern = c("a", "b", "e", "g", "p", "n", "s"))) %>%
+        pattern = c("a", "b", "e", "g", "p", "n", "s")
+      )) %>%
       collect(),
     df,
     warning = TRUE
   )
 
   expect_dplyr_equal(
-   input %>%
+    input %>%
       mutate(dots_count = str_count(dots, ".")) %>%
       collect(),
     df

From a408dd86f3f1023320adf36d52843d9cd4ac00d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= <dragos.mold@gmail.com>
Date: Thu, 21 Oct 2021 10:15:03 +0100
Subject: [PATCH 10/12] tweaked the styling a bit

---
 r/tests/testthat/test-dplyr-funcs-string.R | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R
index c649c90c67d..f7dcc02cc25 100644
--- a/r/tests/testthat/test-dplyr-funcs-string.R
+++ b/r/tests/testthat/test-dplyr-funcs-string.R
@@ -1340,10 +1340,7 @@ test_that("str_starts, str_ends, startsWith, endsWith", {
 
 test_that("str_count", {
   df <- tibble(
-    cities = c(
-      "Kolkata", "Dar es Salaam", "Tel Aviv", "San Antonio",
-      "Cluj Napoca", "Bern", "Bogota"
-    ),
+    cities = c("Kolkata", "Dar es Salaam", "Tel Aviv", "San Antonio", "Cluj Napoca", "Bern", "Bogota"),
     dots = c("a.", "...", ".a.a", "a..a.", "ab...", "dse....", ".f..d..")
   )
 
@@ -1380,10 +1377,7 @@ test_that("str_count", {
   # nse_funcs$str_count() is not vectorised over pattern
   expect_dplyr_equal(
     input %>%
-      mutate(let_count = str_count(
-        cities,
-        pattern = c("a", "b", "e", "g", "p", "n", "s")
-      )) %>%
+      mutate(let_count = str_count(cities, pattern = c("a", "b", "e", "g", "p", "n", "s"))) %>%
       collect(),
     df,
     warning = TRUE

From 606a623f8db356d844eaeb2c57a9b3d9b08514f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Thu, 21 Oct 2021 17:25:08 +0100
Subject: [PATCH 11/12] Update r/NEWS.md

remove NEWS.md bullet point as we're in the immediate pre-release period

Co-authored-by: Ian Cook <ianmcook@gmail.com>
---
 r/NEWS.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/r/NEWS.md b/r/NEWS.md
index 43230899787..065a5d792df 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -19,8 +19,6 @@
 
 # arrow 5.0.0.9000
 
-* added bindings for `str_count()`. (ARROW-13156, @dragosmg)
-
 There are now two ways to query Arrow data:
 
 ## 1. Expanded Arrow-native queries: aggregation and joins

From 51060e8c0f458e96fecf75a096a0cefa378b99e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?=
 <dragos.mold@gmail.com>
Date: Thu, 21 Oct 2021 17:25:44 +0100
Subject: [PATCH 12/12] Update r/tests/testthat/test-dplyr-funcs-string.R

Co-authored-by: Ian Cook <ianmcook@gmail.com>
---
 r/tests/testthat/test-dplyr-funcs-string.R | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R
index f7dcc02cc25..333735be4f0 100644
--- a/r/tests/testthat/test-dplyr-funcs-string.R
+++ b/r/tests/testthat/test-dplyr-funcs-string.R
@@ -791,8 +791,7 @@ test_that("strftime", {
   )
 
   withr::with_timezone(
-    "Pacific/Marquesas",
-    {
+    "Pacific/Marquesas", {
       expect_dplyr_equal(
         input %>%
           mutate(