From b982c4220ef55bbb9760b152f294b933109573f7 Mon Sep 17 00:00:00 2001
From: Kyle Haynes <kyle.haynes@treasury.qld.gov.au>
Date: Thu, 19 Dec 2019 07:51:21 +1000
Subject: [PATCH 01/10] added %plike%

---
 NAMESPACE             | 2 +-
 NEWS.md               | 1 +
 R/like.R              | 8 +++++---
 inst/tests/tests.Rraw | 6 +++++-
 man/like.Rd           | 6 +++++-
 5 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 7689afe383..503cfef735 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -8,7 +8,7 @@ exportClasses(data.table, IDate, ITime)
 export(data.table, tables, setkey, setkeyv, key, "key<-", haskey, CJ, SJ, copy)
 export(setindex, setindexv, indices)
 export(as.data.table,is.data.table,test.data.table)
-export(last,first,like,"%like%","%ilike%","%flike%",between,"%between%",inrange,"%inrange%")
+export(last,first,like,"%like%","%ilike%","%flike%","%plike%",between,"%between%",inrange,"%inrange%")
 export(timetaken)
 export(truelength, setalloccol, alloc.col, ":=")
 export(setattr, setnames, setcolorder, set, setDT, setDF)
diff --git a/NEWS.md b/NEWS.md
index 8f17827eb1..7349b51d12 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -16,6 +16,7 @@
 
 5. `nafill` and `setnafill` gain `nan` argument to say whether `NaN` should be considered the same as `NA` for filling purposes, [#4020](https://github.com/Rdatatable/data.table/issues/4020). Prior versions had an implicit value of `nan=NaN`; the default is now `nan=NA`, i.e., `NaN` is treated as if it's missing. Thanks @AnonymousBoba for the suggestion. Also, while `nafill` still respects `getOption('datatable.verbose')`, the `verbose` argument has been removed.
 
+6. New convenience functions `%plike%` which map the existing `like()` argument `perl`, [#3702](https://github.com/Rdatatable/data.table/issues/3702). `%plike%` uses Perl-compatible regular expression (PCRE) which extends on TRE and is more efficient. Thanks @KyleHaynes for the suggestion and PR.
 
 ## BUG FIXES
 
diff --git a/R/like.R b/R/like.R
index c66678c643..d17ffc129b 100644
--- a/R/like.R
+++ b/R/like.R
@@ -1,12 +1,12 @@
 # Intended for use with a data.table 'where'
 # Don't use * or % like SQL's like.  Uses regexpr syntax - more powerful.
 # returns 'logical' so can be combined with other where clauses.
-like = function(vector, pattern, ignore.case = FALSE, fixed = FALSE) {
+like = function(vector, pattern, ignore.case = FALSE, fixed = FALSE, perl = FALSE) {
   if (is.factor(vector)) {
-    as.integer(vector) %in% grep(pattern, levels(vector), ignore.case = ignore.case, fixed = fixed)
+    as.integer(vector) %in% grep(pattern, levels(vector), ignore.case = ignore.case, fixed = fixed, perl = perl)
   } else {
     # most usually character, but integer and numerics will be silently coerced by grepl
-    grepl(pattern, vector, ignore.case = ignore.case, fixed = fixed)
+    grepl(pattern, vector, ignore.case = ignore.case, fixed = fixed, perl = perl)
   }
 }
 
@@ -16,3 +16,5 @@ like = function(vector, pattern, ignore.case = FALSE, fixed = FALSE) {
 # as grep -F or fgrep -- grep against a fixed pattern (no regex)
 #   (more efficient where applicable)
 "%flike%" = function(vector, pattern) like(vector, pattern, fixed = TRUE)
+# Perl-compatible regex
+"%plike%" = function(vector, pattern) like(vector, pattern, perl = TRUE)
\ No newline at end of file
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index b1a6feaa69..05e46946ba 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -7211,7 +7211,7 @@ test(1530.2, which.first(x), which(x)[1L])
 test(1530.3, which.last(1:5), error = "x not boolean")
 test(1530.4, which.last(x), tail(which(x), 1L))
 
-# test for like, %like%, %ilike%, %flike%
+# test for like, %like%, %ilike%, %flike%, %plike%
 set.seed(2L)
 x = apply(matrix(sample(letters, 12), nrow=2), 1, paste, collapse="")
 y = factor(sample(c(letters[1:5], x), 20, TRUE))
@@ -7227,6 +7227,10 @@ test(1532.6, like(x, '()'), c(TRUE, TRUE, TRUE))
 test(1532.7, like(x, '()', fixed = TRUE), c(FALSE, FALSE, TRUE))
 test(1532.8, x %ilike% 'hey', c(TRUE, TRUE, FALSE))
 test(1532.9, x %flike% '()', c(FALSE, FALSE, TRUE))
+## %plike% for #3702
+test(1532.10, like(x, "(?=h)(?=.*y)", perl = TRUE), c(FALSE, TRUE, FALSE))
+test(1532.11, x %plike% "(?=h)(?=.*y)", c(FALSE, TRUE, FALSE))
+
 
 # coverage for setkey() to 100%
 dt1 = data.table(x=sample(5), y=1:5, key="y")
diff --git a/man/like.Rd b/man/like.Rd
index 4eadb98a81..df5d757c9e 100644
--- a/man/like.Rd
+++ b/man/like.Rd
@@ -3,6 +3,7 @@
 \alias{\%like\%}
 \alias{\%ilike\%}
 \alias{\%flike\%}
+\alias{\%plike\%}
 \title{ Convenience function for calling grep. }
 \description{
   Intended for use in \code{i} in \code{\link[=data.table]{[.data.table}}, i.e., for subsetting/filtering.
@@ -10,16 +11,18 @@
   Syntax should be familiar to SQL users, with interpretation as regex.
 }
 \usage{
-like(vector, pattern, ignore.case = FALSE, fixed = FALSE)
+like(vector, pattern, ignore.case = FALSE, fixed = FALSE, perl = FALSE)
 vector \%like\% pattern
 vector \%ilike\% pattern
 vector \%flike\% pattern
+vector \%plike\% pattern
 }
 \arguments{
    \item{vector}{ Either a \code{character} or a \code{factor} vector. }
    \item{pattern}{ Pattern to be matched }
    \item{ignore.case}{ \code{logical}; is \code{pattern} case-sensitive? }
    \item{fixed}{ \code{logical}; should \code{pattern} be interpreted as a literal string (i.e., ignoring regular expressions)? }
+   \item{perl}{ \code{logical}; is \code{pattern} perl-compatible regular expression? }
 }
 \details{
   Internally, \code{like} is essentially a wrapper around \code{\link[base:grep]{base::grepl}}, except that it is smarter about handling \code{factor} input (\code{base::grep} uses slow \code{as.character} conversion).
@@ -34,5 +37,6 @@ DT = data.table(Name=c("Mary","George","Martha"), Salary=c(2,3,4))
 DT[Name \%like\% "^Mar"]
 DT[Name \%ilike\% "mar"]
 DT[Name \%flike\% "Mar"]
+DT[Name \%plike\% "(?=Ma)(?=.*y)"]
 }
 \keyword{ data }

From e1077edcc317de8bd3ad1b9273cfe50c61ec2c89 Mon Sep 17 00:00:00 2001
From: Kyle Haynes <kyle@kylehaynes.com.au>
Date: Thu, 19 Dec 2019 07:54:06 +1000
Subject: [PATCH 02/10] fixed typo

---
 NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 7349b51d12..cad4d213a4 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -16,7 +16,7 @@
 
 5. `nafill` and `setnafill` gain `nan` argument to say whether `NaN` should be considered the same as `NA` for filling purposes, [#4020](https://github.com/Rdatatable/data.table/issues/4020). Prior versions had an implicit value of `nan=NaN`; the default is now `nan=NA`, i.e., `NaN` is treated as if it's missing. Thanks @AnonymousBoba for the suggestion. Also, while `nafill` still respects `getOption('datatable.verbose')`, the `verbose` argument has been removed.
 
-6. New convenience functions `%plike%` which map the existing `like()` argument `perl`, [#3702](https://github.com/Rdatatable/data.table/issues/3702). `%plike%` uses Perl-compatible regular expression (PCRE) which extends on TRE and is more efficient. Thanks @KyleHaynes for the suggestion and PR.
+6. New convenience function `%plike%` which map the existing `like()` argument `perl`, [#3702](https://github.com/Rdatatable/data.table/issues/3702). `%plike%` uses Perl-compatible regular expression (PCRE) which extends on TRE and is more efficient. Thanks @KyleHaynes for the suggestion and PR.
 
 ## BUG FIXES
 

From 28f6b1e06d265c20ad72b5abc7e8914d37e7ea91 Mon Sep 17 00:00:00 2001
From: Kyle Haynes <kyle@kylehaynes.com.au>
Date: Thu, 19 Dec 2019 07:57:06 +1000
Subject: [PATCH 03/10] fixed case

---
 man/like.Rd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/man/like.Rd b/man/like.Rd
index df5d757c9e..81016d2843 100644
--- a/man/like.Rd
+++ b/man/like.Rd
@@ -22,7 +22,7 @@ vector \%plike\% pattern
    \item{pattern}{ Pattern to be matched }
    \item{ignore.case}{ \code{logical}; is \code{pattern} case-sensitive? }
    \item{fixed}{ \code{logical}; should \code{pattern} be interpreted as a literal string (i.e., ignoring regular expressions)? }
-   \item{perl}{ \code{logical}; is \code{pattern} perl-compatible regular expression? }
+   \item{perl}{ \code{logical}; is \code{pattern} Perl-compatible regular expression? }
 }
 \details{
   Internally, \code{like} is essentially a wrapper around \code{\link[base:grep]{base::grepl}}, except that it is smarter about handling \code{factor} input (\code{base::grep} uses slow \code{as.character} conversion).

From 044dd0e3060efbbe018816f6ca2226669ee9dd7d Mon Sep 17 00:00:00 2001
From: Kyle Haynes <kyle@kylehaynes.com.au>
Date: Thu, 19 Dec 2019 08:16:52 +1000
Subject: [PATCH 04/10] Fixed tests.Rraw

---
 inst/tests/tests.Rraw | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 05e46946ba..f28f8710b7 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -7216,17 +7216,17 @@ set.seed(2L)
 x = apply(matrix(sample(letters, 12), nrow=2), 1, paste, collapse="")
 y = factor(sample(c(letters[1:5], x), 20, TRUE))
 xsub = substring(x, 1L, 1L)
-test(1532.1, y %like% xsub[1L], grepl(xsub[1L], y))
-test(1532.2, y %like% xsub[2L], grepl(xsub[2L], y))
-test(1532.3, like(y, xsub[1L]), grepl(xsub[1L], y))
-test(1532.4, like(y, xsub[2L]), grepl(xsub[2L], y))
+test(1532.01, y %like% xsub[1L], grepl(xsub[1L], y))
+test(1532.02, y %like% xsub[2L], grepl(xsub[2L], y))
+test(1532.03, like(y, xsub[1L]), grepl(xsub[1L], y))
+test(1532.04, like(y, xsub[2L]), grepl(xsub[2L], y))
 ## %ilike% and %flike% for #3333
 x = c('HEY', 'hey', '()')
-test(1532.5, like(x, 'hey', ignore.case = TRUE), c(TRUE, TRUE, FALSE))
-test(1532.6, like(x, '()'), c(TRUE, TRUE, TRUE))
-test(1532.7, like(x, '()', fixed = TRUE), c(FALSE, FALSE, TRUE))
-test(1532.8, x %ilike% 'hey', c(TRUE, TRUE, FALSE))
-test(1532.9, x %flike% '()', c(FALSE, FALSE, TRUE))
+test(1532.05, like(x, 'hey', ignore.case = TRUE), c(TRUE, TRUE, FALSE))
+test(1532.06, like(x, '()'), c(TRUE, TRUE, TRUE))
+test(1532.07, like(x, '()', fixed = TRUE), c(FALSE, FALSE, TRUE))
+test(1532.08, x %ilike% 'hey', c(TRUE, TRUE, FALSE))
+test(1532.09, x %flike% '()', c(FALSE, FALSE, TRUE))
 ## %plike% for #3702
 test(1532.10, like(x, "(?=h)(?=.*y)", perl = TRUE), c(FALSE, TRUE, FALSE))
 test(1532.11, x %plike% "(?=h)(?=.*y)", c(FALSE, TRUE, FALSE))

From d626d2fdbae62415417bdef19f25bf7401c038df Mon Sep 17 00:00:00 2001
From: Kyle Haynes <kyle.haynes@treasury.qld.gov.au>
Date: Thu, 19 Dec 2019 11:18:12 +1000
Subject: [PATCH 05/10] changes made based on feedback from @MichaelChirico

---
 NEWS.md  | 2 +-
 R/like.R | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index cad4d213a4..874fc939ff 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -16,7 +16,7 @@
 
 5. `nafill` and `setnafill` gain `nan` argument to say whether `NaN` should be considered the same as `NA` for filling purposes, [#4020](https://github.com/Rdatatable/data.table/issues/4020). Prior versions had an implicit value of `nan=NaN`; the default is now `nan=NA`, i.e., `NaN` is treated as if it's missing. Thanks @AnonymousBoba for the suggestion. Also, while `nafill` still respects `getOption('datatable.verbose')`, the `verbose` argument has been removed.
 
-6. New convenience function `%plike%` which map the existing `like()` argument `perl`, [#3702](https://github.com/Rdatatable/data.table/issues/3702). `%plike%` uses Perl-compatible regular expression (PCRE) which extends on TRE and is more efficient. Thanks @KyleHaynes for the suggestion and PR.
+6. New convenience function `%plike%` which map the existing `like()` argument `perl`, [#3702](https://github.com/Rdatatable/data.table/issues/3702). `%plike%` uses Perl-compatible regular expression (PCRE) which extends on TRE and is computationally more efficient. Thanks @KyleHaynes for the suggestion and PR.
 
 ## BUG FIXES
 
diff --git a/R/like.R b/R/like.R
index d17ffc129b..17f3f758f3 100644
--- a/R/like.R
+++ b/R/like.R
@@ -1,12 +1,12 @@
 # Intended for use with a data.table 'where'
 # Don't use * or % like SQL's like.  Uses regexpr syntax - more powerful.
 # returns 'logical' so can be combined with other where clauses.
-like = function(vector, pattern, ignore.case = FALSE, fixed = FALSE, perl = FALSE) {
+like = function(vector, pattern, ...) {
   if (is.factor(vector)) {
-    as.integer(vector) %in% grep(pattern, levels(vector), ignore.case = ignore.case, fixed = fixed, perl = perl)
+    as.integer(vector) %in% grep(pattern, levels(vector), ...)
   } else {
     # most usually character, but integer and numerics will be silently coerced by grepl
-    grepl(pattern, vector, ignore.case = ignore.case, fixed = fixed, perl = perl)
+    grepl(pattern, vector, ignore.case = ...)
   }
 }
 

From 66029fb500d72d72b60bd69199e7df8ace94dbd5 Mon Sep 17 00:00:00 2001
From: Kyle Haynes <kyle@kylehaynes.com.au>
Date: Tue, 19 May 2020 08:34:51 +1000
Subject: [PATCH 06/10] Updated code error in `like`

---
 R/like.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/like.R b/R/like.R
index 17f3f758f3..a14d19bdf2 100644
--- a/R/like.R
+++ b/R/like.R
@@ -6,7 +6,7 @@ like = function(vector, pattern, ...) {
     as.integer(vector) %in% grep(pattern, levels(vector), ...)
   } else {
     # most usually character, but integer and numerics will be silently coerced by grepl
-    grepl(pattern, vector, ignore.case = ...)
+    grepl(pattern, vector, ...)
   }
 }
 
@@ -17,4 +17,4 @@ like = function(vector, pattern, ...) {
 #   (more efficient where applicable)
 "%flike%" = function(vector, pattern) like(vector, pattern, fixed = TRUE)
 # Perl-compatible regex
-"%plike%" = function(vector, pattern) like(vector, pattern, perl = TRUE)
\ No newline at end of file
+"%plike%" = function(vector, pattern) like(vector, pattern, perl = TRUE)

From 2099862fec9937df8f77760fcadd2cdd2e4b046c Mon Sep 17 00:00:00 2001
From: Kyle Haynes <kyle.haynes@treasury.qld.gov.au>
Date: Tue, 19 May 2020 11:32:47 +1000
Subject: [PATCH 07/10] reverted back to explicit arguments for like.

---
 R/like.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/like.R b/R/like.R
index a14d19bdf2..822ee2fb9b 100644
--- a/R/like.R
+++ b/R/like.R
@@ -1,12 +1,12 @@
 # Intended for use with a data.table 'where'
 # Don't use * or % like SQL's like.  Uses regexpr syntax - more powerful.
 # returns 'logical' so can be combined with other where clauses.
-like = function(vector, pattern, ...) {
+like = function(vector, pattern, ignore.case = FALSE, fixed = FALSE, perl = FALSE) {
   if (is.factor(vector)) {
-    as.integer(vector) %in% grep(pattern, levels(vector), ...)
+    as.integer(vector) %in% grep(pattern, levels(vector), ignore.case = ignore.case, fixed = fixed, perl = perl)
   } else {
     # most usually character, but integer and numerics will be silently coerced by grepl
-    grepl(pattern, vector, ...)
+    grepl(pattern, vector, ignore.case = ignore.case, fixed = fixed, perl = perl)
   }
 }
 

From c50a6ef003c19b4d978d17630c68443a9f16dfcc Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Wed, 4 Aug 2021 16:05:00 -0600
Subject: [PATCH 08/10] more guarded efficiency wording

---
 NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 8a8d658623..f944a2ffb8 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -95,7 +95,7 @@
 
 14. `.datatable.aware` is now recognized in the calling environment in addition to the namespace of the calling package, [dtplyr#184](https://github.com/tidyverse/dtplyr/issues/184). Thanks to Hadley Wickham for the idea and PR.
 
-15. New convenience function `%plike%` maps to the existing `like()` argument `perl`, [#3702](https://github.com/Rdatatable/data.table/issues/3702). `%plike%` uses Perl-compatible regular expression (PCRE) which extends on TRE and is computationally more efficient. Thanks @KyleHaynes for the suggestion and PR.
+15. New convenience function `%plike%` maps to `like(..., perl=TRUE)`, [#3702](https://github.com/Rdatatable/data.table/issues/3702). `%plike%` uses Perl-compatible regular expressions (PCRE) which extend TRE, and may be more efficient in some cases. Thanks @KyleHaynes for the suggestion and PR.
 
 ## BUG FIXES
 

From 67a9cb9aa75777a477682e685d885dabff4fdacf Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Wed, 4 Aug 2021 16:12:44 -0600
Subject: [PATCH 09/10] Add Kyle to contributor list in DESCRIPTION

---
 DESCRIPTION | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 523a40f041..ff8fe0ebf6 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -66,7 +66,8 @@ Authors@R: c(
   person("Ofek","Shilon",          role="ctb"),
   person("Vadim","Khotilovich",    role="ctb"),
   person("Hadley","Wickham",       role="ctb"),
-  person("Bennet","Becker",        role="ctb"))
+  person("Bennet","Becker",        role="ctb"),
+  person("Kyle","Haynes",          role="ctb"))
 Depends: R (>= 3.1.0)
 Imports: methods
 Suggests: bit64 (>= 4.0.0), bit (>= 4.0.4), curl, R.utils, xts, nanotime, zoo (>= 1.8-1), yaml, knitr, rmarkdown, markdown

From c476ec75be92aa9c351d04430094efac03d1f1e8 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Wed, 4 Aug 2021 16:36:47 -0600
Subject: [PATCH 10/10] restore x for the test

---
 inst/tests/tests.Rraw | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 0cba60e304..49dd28509f 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -7382,15 +7382,11 @@ test(1532.06, like(x, '()'), c(TRUE, TRUE, TRUE))
 test(1532.07, like(x, '()', fixed = TRUE), c(FALSE, FALSE, TRUE))
 test(1532.08, x %ilike% 'hey', c(TRUE, TRUE, FALSE))
 test(1532.09, x %flike% '()', c(FALSE, FALSE, TRUE))
-
-## %like% test for ordered factor with NA
-x = c("A", "B", "C", NA_character_)
+test(1532.10, like(x, "(?=h)(?=.*y)", perl = TRUE), c(FALSE, TRUE, FALSE))
+test(1532.11, x %plike% "(?=h)(?=.*y)", c(FALSE, TRUE, FALSE)) #3702
+x = c("A", "B", "C", NA_character_)  # ordered factor with NA
 x = ordered(x, levels = rev(x)[-1L])
-test(1532.10, x %like% "A", c(TRUE, FALSE, FALSE, FALSE))
-
-## %plike% for #3702
-test(1532.11, like(x, "(?=h)(?=.*y)", perl = TRUE), c(FALSE, TRUE, FALSE))
-test(1532.12, x %plike% "(?=h)(?=.*y)", c(FALSE, TRUE, FALSE))
+test(1532.12, x %like% "A", c(TRUE, FALSE, FALSE, FALSE))
 
 # coverage for setkey() to 100%
 dt1 = data.table(x=sample(5), y=1:5, key="y")