Skip to content
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ Authors@R: c(
person("Ofek","Shilon", role="ctb"),
person("Vadim","Khotilovich", role="ctb"),
person("Hadley","Wickham", role="ctb"),
person("Bennet","Becker", role="ctb"))
person("Bennet","Becker", role="ctb"),
person("Kyle","Haynes", role="ctb"))
Depends: R (>= 3.1.0)
Imports: methods
Suggests: bit64 (>= 4.0.0), bit (>= 4.0.4), curl, R.utils, xts, nanotime, zoo (>= 1.8-1), yaml, knitr, rmarkdown, markdown
Expand Down
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ exportClasses(data.table, IDate, ITime)
export(data.table, tables, setkey, setkeyv, key, "key<-", haskey, CJ, SJ, copy)
export(setindex, setindexv, indices)
export(as.data.table,is.data.table,test.data.table)
export(last,first,like,"%like%","%ilike%","%flike%",between,"%between%",inrange,"%inrange%")
export(last,first,like,"%like%","%ilike%","%flike%","%plike%",between,"%between%",inrange,"%inrange%")
export(timetaken)
export(truelength, setalloccol, alloc.col, ":=")
export(setattr, setnames, setcolorder, set, setDT, setDF)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@

14. `.datatable.aware` is now recognized in the calling environment in addition to the namespace of the calling package, [dtplyr#184](https://github.com/tidyverse/dtplyr/issues/184). Thanks to Hadley Wickham for the idea and PR.

15. New convenience function `%plike%` maps to `like(..., perl=TRUE)`, [#3702](https://github.com/Rdatatable/data.table/issues/3702). `%plike%` uses Perl-compatible regular expressions (PCRE) which extend TRE, and may be more efficient in some cases. Thanks @KyleHaynes for the suggestion and PR.

## BUG FIXES

1. `by=.EACHI` when `i` is keyed but `on=` different columns than `i`'s key could create an invalidly keyed result, [#4603](https://github.com/Rdatatable/data.table/issues/4603) [#4911](https://github.com/Rdatatable/data.table/issues/4911). Thanks to @myoung3 and @adamaltmejd for reporting, and @ColeMiller1 for the PR. An invalid key is where a `data.table` is marked as sorted by the key columns but the data is not sorted by those columns, leading to incorrect results from subsequent queries.
Expand Down
8 changes: 5 additions & 3 deletions R/like.R
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# Intended for use with a data.table 'where'
# Don't use * or % like SQL's like. Uses regexpr syntax - more powerful.
# returns 'logical' so can be combined with other where clauses.
like = function(vector, pattern, ignore.case = FALSE, fixed = FALSE) {
like = function(vector, pattern, ignore.case = FALSE, fixed = FALSE, perl = FALSE) {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it's common enough to need another infix for it, but we might as well add useBytes as an argument too? Comes up occasionally when working with messy strings.

Or maybe even just change the signature to function(vector, pattern, ...) and pass it on, although grepl is more limited than grep...

if (is.factor(vector)) {
# indexing by factors is equivalent to indexing by the numeric codes, see ?`[` #4748
ret = grepl(pattern, levels(vector), ignore.case = ignore.case, fixed = fixed)[vector]
ret = grepl(pattern, levels(vector), ignore.case = ignore.case, fixed = fixed, perl = perl)[vector]
ret[is.na(ret)] = FALSE
ret
} else {
# most usually character, but integer and numerics will be silently coerced by grepl
grepl(pattern, vector, ignore.case = ignore.case, fixed = fixed)
grepl(pattern, vector, ignore.case = ignore.case, fixed = fixed, perl = perl)
}
}

Expand All @@ -19,3 +19,5 @@ like = function(vector, pattern, ignore.case = FALSE, fixed = FALSE) {
# as grep -F or fgrep -- grep against a fixed pattern (no regex)
# (more efficient where applicable)
"%flike%" = function(vector, pattern) like(vector, pattern, fixed = TRUE)
# Perl-compatible regex
"%plike%" = function(vector, pattern) like(vector, pattern, perl = TRUE)
9 changes: 5 additions & 4 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -7366,7 +7366,7 @@ test(1530.2, which.first(x), which(x)[1L])
test(1530.3, which.last(1:5), error = "x not boolean")
test(1530.4, which.last(x), tail(which(x), 1L))

# test for like, %like%, %ilike%, %flike%
# test for like, %like%, %ilike%, %flike%, %plike%
set.seed(2L)
x = apply(matrix(sample(letters, 12), nrow=2), 1, paste, collapse="")
y = factor(sample(c(letters[1:5], x), 20, TRUE))
Expand All @@ -7382,10 +7382,11 @@ test(1532.06, like(x, '()'), c(TRUE, TRUE, TRUE))
test(1532.07, like(x, '()', fixed = TRUE), c(FALSE, FALSE, TRUE))
test(1532.08, x %ilike% 'hey', c(TRUE, TRUE, FALSE))
test(1532.09, x %flike% '()', c(FALSE, FALSE, TRUE))
## %like% test for ordered factor with NA
x = c("A", "B", "C", NA_character_)
test(1532.10, like(x, "(?=h)(?=.*y)", perl = TRUE), c(FALSE, TRUE, FALSE))
test(1532.11, x %plike% "(?=h)(?=.*y)", c(FALSE, TRUE, FALSE)) #3702
x = c("A", "B", "C", NA_character_) # ordered factor with NA
x = ordered(x, levels = rev(x)[-1L])
test(1532.10, x %like% "A", c(TRUE, FALSE, FALSE, FALSE))
test(1532.12, x %like% "A", c(TRUE, FALSE, FALSE, FALSE))

# coverage for setkey() to 100%
dt1 = data.table(x=sample(5), y=1:5, key="y")
Expand Down
6 changes: 5 additions & 1 deletion man/like.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,26 @@
\alias{\%like\%}
\alias{\%ilike\%}
\alias{\%flike\%}
\alias{\%plike\%}
\title{ Convenience function for calling grep. }
\description{
Intended for use in \code{i} in \code{\link[=data.table]{[.data.table}}, i.e., for subsetting/filtering.

Syntax should be familiar to SQL users, with interpretation as regex.
}
\usage{
like(vector, pattern, ignore.case = FALSE, fixed = FALSE)
like(vector, pattern, ignore.case = FALSE, fixed = FALSE, perl = FALSE)
vector \%like\% pattern
vector \%ilike\% pattern
vector \%flike\% pattern
vector \%plike\% pattern
}
\arguments{
\item{vector}{ Either a \code{character} or a \code{factor} vector. }
\item{pattern}{ Pattern to be matched }
\item{ignore.case}{ \code{logical}; is \code{pattern} case-sensitive? }
\item{fixed}{ \code{logical}; should \code{pattern} be interpreted as a literal string (i.e., ignoring regular expressions)? }
\item{perl}{ \code{logical}; is \code{pattern} Perl-compatible regular expression? }
}
\details{
Internally, \code{like} is essentially a wrapper around \code{\link[base:grep]{base::grepl}}, except that it is smarter about handling \code{factor} input (\code{base::grep} uses slow \code{as.character} conversion).
Expand All @@ -34,5 +37,6 @@ DT = data.table(Name=c("Mary","George","Martha"), Salary=c(2,3,4))
DT[Name \%like\% "^Mar"]
DT[Name \%ilike\% "mar"]
DT[Name \%flike\% "Mar"]
DT[Name \%plike\% "(?=Ma)(?=.*y)"]
}
\keyword{ data }