diff --git a/NEWS.md b/NEWS.md
index bc6b599c35..dbef189c0f 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -228,6 +228,8 @@
 
 27. `integer64` sum-by-group is now properly optimized, [#1647](https://github.com/Rdatatable/data.table/issues/1647), [#3464](https://github.com/Rdatatable/data.table/issues/3464). Thanks to @mlandry22-h2o for the report.
 
+28. From v1.12.0 `between()` and `%between%` interpret missing values in `lower=` or `upper=` as unlimited bounds. A new parameter `NAbounds` has been added to achieve the old behaviour of returning `NA`, [#3522](https://github.com/Rdatatable/data.table/issues/3522). Thanks @cguill95 for reporting. This is now consistent for character input, [#3667](https://github.com/Rdatatable/data.table/issues/3667) (thanks @AnonymousBoba), and class `nanotime` is now supported too.
+
 #### NOTES
 
 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below.
diff --git a/R/between.R b/R/between.R
index 97498a97a2..b9588495cb 100644
--- a/R/between.R
+++ b/R/between.R
@@ -1,27 +1,18 @@
 # is x[i] in between lower[i] and upper[i] ?
-between = function(x,lower,upper,incbounds=TRUE) {
+between = function(x, lower, upper, incbounds=TRUE, NAbounds=TRUE) {
   if (is.logical(x)) stop("between has been x of type logical")
   if (is.logical(lower)) lower = as.integer(lower)   # typically NA (which is logical type)
   if (is.logical(upper)) upper = as.integer(upper)   # typically NA (which is logical type)
   is.px = function(x) inherits(x, "POSIXct")
-  is.i64 = function(x) inherits(x, "integer64")
+  is.i64 = function(x) inherits(x, "integer64")   # this is true for nanotime too
   # POSIX special handling to auto coerce character
-  if (is.px(x) && !is.null(tz<-attr(x, "tzone", exact=TRUE)) && nzchar(tz) &&
-      (is.character(lower) || is.character(upper))) {
-    try_posix_cast = function(x, tz) {tryCatch(
-      list(status=0L, value=as.POSIXct(x, tz = tz)),
-      error = function(e) list(status=1L, value=NULL, message=e[["message"]])
-    )}
-    if (is.character(lower)) {
-      ans = try_posix_cast(lower, tz)
-      if (ans$status==0L) lower = ans$value
-      else stop("'between' function the 'x' argument is a POSIX class while 'lower' was not, coercion to POSIX failed with: ", ans$message)
-    }
-    if (is.character(upper)) {
-      ans = try_posix_cast(upper, tz)
-      if (ans$status==0L) upper = ans$value
-      else stop("'between' function the 'x' argument is a POSIX class while 'upper' was not, coercion to POSIX failed with: ", ans$message)
-    }
+  if (is.px(x) && (is.character(lower) || is.character(upper))) {
+    tz = attr(x, "tzone", exact=TRUE)
+    if (is.null(tz)) tz = ""
+    if (is.character(lower)) lower = tryCatch(as.POSIXct(lower, tz=tz), error=function(e)stop(
+      "'between' function the 'x' argument is a POSIX class while 'lower' was not, coercion to POSIX failed with: ", e$message))
+    if (is.character(upper)) upper = tryCatch(as.POSIXct(upper, tz=tz), error=function(e)stop(
+      "'between' function the 'x' argument is a POSIX class while 'upper' was not, coercion to POSIX failed with: ", e$message))
     stopifnot(is.px(x), is.px(lower), is.px(upper)) # nocov # internal
   }
   # POSIX check timezone match
@@ -34,23 +25,20 @@ between = function(x,lower,upper,incbounds=TRUE) {
       stop("'between' function arguments have mismatched timezone attribute, align all arguments to same timezone")
     }
   }
-  # int64
   if (is.i64(x)) {
     if (!requireNamespace("bit64", quietly=TRUE)) stop("trying to use integer64 class when 'bit64' package is not installed") # nocov
     if (!is.i64(lower) && is.numeric(lower)) lower = bit64::as.integer64(lower)
     if (!is.i64(upper) && is.numeric(upper)) upper = bit64::as.integer64(upper)
-  } else if (is.i64(lower) || is.i64(upper)) {
-    stop("'lower' and/or 'upper' are integer64 class while 'x' argument is not, align classes before passing to 'between'")
   }
-  is.supported = function(x) is.numeric(x) || is.px(x)
+  is.supported = function(x) is.numeric(x) || is.character(x) || is.px(x)
   if (is.supported(x) && is.supported(lower) && is.supported(upper)) {
-    # faster parallelised version for int/double.
+    # faster parallelised version for int/double/character
     # Cbetween supports length(lower)==1 (recycled) and (from v1.12.0) length(lower)==length(x).
     # length(upper) can be 1 or length(x) independently of lower
-    .Call(Cbetween, x, lower, upper, incbounds)
+    .Call(Cbetween, x, lower, upper, incbounds, NAbounds)
   } else {
     if (isTRUE(getOption("datatable.verbose"))) cat("optimised between not available for this data type, fallback to slow R routine\n")
-    # now just for character input. TODO: support character between in Cbetween and remove this branch
+    if (isTRUE(NAbounds) && (anyNA(lower) || anyNA(upper))) stop("Not yet implemented NAbounds=TRUE for this non-numeric and non-character type")
     if (incbounds) x>=lower & x<=upper
     else x>lower & x<upper
   }
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 58c3c63c4a..964e9181aa 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -7159,13 +7159,21 @@ test(1529.04, between(x, 1L, 5L, TRUE), x >= 1L & x <= 5L)
 test(1529.05, x %between% c(1L, 5L), x >= 1L & x <= 5L)
 test(1529.06, between(x, 1L, 5L, FALSE), x > 1L & x < 5L)
 x = runif(15)
-test(1529.07, between(x, 0.25, 0.75, TRUE), x >= 0.25 & x <= 0.75)
+test(1529.07, between(x, 0.25, 0.75, incbounds=TRUE), x >= 0.25 & x <= 0.75)
 test(1529.08, x %between% c(0.25, 0.75), x >= 0.25 & x <= 0.75)
-test(1529.09, between(x, 0.25, 0.75, FALSE), x > 0.25 & x < 0.75)
+test(1529.09, between(x, 0.25, 0.75, incbounds=FALSE), x > 0.25 & x < 0.75)
+test(1529.10, between(x, 0.25, NA,   NAbounds=NA),     ifelse(x<=0.25, FALSE, NA))
+test(1529.11, between(x, NA, 0.75,   NAbounds=NA),     ifelse(x>=0.75, FALSE, NA))
+test(1529.12, between(x, NA, NA,     NAbounds=NA),     rep(NA, length(x)))
+test(1529.13, between(x, NA, NA,     NAbounds=TRUE),   rep(TRUE, length(x)))
+test(1529.14, between(x, x[3], NA, incbounds=FALSE, NAbounds=NA), ifelse(x<=x[3], FALSE, NA))
+test(1529.15, between(x, x[3], NA, incbounds=TRUE, NAbounds=NA),  ifelse(x<x[3], FALSE, NA))
+test(1529.16, between(x, NA, x[9], incbounds=FALSE, NAbounds=NA), ifelse(x>=x[9], FALSE, NA))
+test(1529.17, between(x, NA, x[9], incbounds=TRUE, NAbounds=NA),  ifelse(x>x[9], FALSE, NA))
 x = c(NA, runif(15), NA)
-test(1529.10, between(x, 0.25, 0.75, TRUE), x >= 0.25 & x <= 0.75)
-test(1529.11, x %between% c(0.25, 0.75), x >= 0.25 & x <= 0.75)
-test(1529.12, between(x, 0.25, 0.75, FALSE), x > 0.25 & x < 0.75)
+test(1529.18, between(x, 0.25, 0.75, incbounds=TRUE), x >= 0.25 & x <= 0.75)
+test(1529.19, x %between% c(0.25, 0.75), x >= 0.25 & x <= 0.75)
+test(1529.20, between(x, 0.25, 0.75, incbounds=FALSE), x > 0.25 & x < 0.75)
 
 # add tests for which.first and which.last
 # which.first
@@ -9441,7 +9449,7 @@ test(1656, nrow(dt[dt, .(x.id, x.a, x.b, x.c, x.row_id, i.id, i.a, i.b, i.c, i.r
 
 # between is vectorised, #534
 set.seed(1L)
-dt = data.table(x=sample(3,10,TRUE), y=sample(2,10,TRUE), z=sample(5,10,TRUE))
+dt = data.table(x=sample(3,10,TRUE), y=sample(2,10,TRUE), z=1L+sample(2,10,TRUE))
 test(1657, dt[x %between% list(y,z)], dt[x>=y & x<=z])
 
 # fwrite tests
@@ -14657,34 +14665,33 @@ test(2037.2, names(DT), 'a')
 test(2037.3, unname(table(unlist(strsplit(capture.output(foo(DT)), '\n|\\s+')))['ptr']), 1L)
 
 # `between` invalid args, and verbose #3516
-test(2038.01, between(1:5, 2, 4, incbounds=423), error="incbounds must be logical")
-test(2038.02, between(1:5, 2, 4, incbounds=NA), error="incbounds must be logical")
+test(2038.01, between(1:5, 2, 4, incbounds=423), error="incbounds must be TRUE or FALSE")
+test(2038.02, between(1:5, 2, 4, incbounds=NA), error="incbounds must be TRUE or FALSE")
 old = options(datatable.verbose=TRUE)
-test(2038.03, between(1:5, 2L, 4L), output="between parallel processing of integer with recycling took")
+test(2038.03, between(1:5, 2L, 4L), output="between parallel processing of integer took")
 test(2038.04, between(1:5, rep(2L,5L), rep(4L, 5L)), output="between parallel processing of integer took")
-test(2038.05, between(as.double(1:5), 2, 4, incbounds=FALSE), output="between parallel processing of double using open bounds with recycling took")
-test(2038.06, between(as.double(1:5), 2, 4), output="between parallel processing of double using closed bounds with recycling took")
-test(2038.07, between(as.double(1:5), rep(2, 5L), rep(4, 5L)), output="between parallel processing of double took")
-test(2038.08, between(c("foo","bar","paz"), "bag", "fog"), output="optimised between not available for this data type, fallback to slow R routine")
+test(2038.05, between(as.double(1:5), 2, 4, incbounds=FALSE), output="between parallel processing of double with open bounds took")
+test(2038.06, between(as.double(1:5), 2, 4), output="between parallel processing of double with closed bounds took")
+test(2038.07, between(as.double(1:5), rep(2, 5L), rep(4, 5L)), output="between parallel processing of double with closed bounds took")
+test(2038.08, between(c("foo","bar","paz"), "bag", "fog"), output="between non-parallel processing of character took")
 
 # `between` handle POSIXct type
 x = as.POSIXct("2016-09-18 07:00:00") + 0:10*60*15
 dn = as.POSIXct('2016-09-18 08:00:00')
 up = as.POSIXct('2016-09-18 09:00:00')
-test(2038.09, between(x, dn, up), output="between parallel processing of double using closed bounds with recycling took")
-test(2038.10, between(x, dn, up, incbounds=FALSE), output="between parallel processing of double using open bounds with recycling took")
+test(2038.09, between(x, dn, up), output="between parallel processing of double with closed bounds took")
+test(2038.10, between(x, dn, up, incbounds=FALSE), output="between parallel processing of double with open bounds took")
 
-# also handling of string lower/upper bounds _only when x has a time zone_
-x = as.POSIXct("2016-09-18 07:00:00") + 0:10*60*15
+# also handling of string lower/upper bounds
+x = as.POSIXct("2016-09-18 07:00:00") + 0:10*60*15  # local time zone
 dn = '2016-09-18 08:00:00'
 up = '2016-09-18 09:00:00'
-test(2038.11, between(x, dn, up), output = 'optimised between not available')
-attr(x, 'tzone') = 'UTC'
-test(2038.12, between(x, dn, up), output = 'between parallel processing of double')
+test(2038.11, between(x, dn, up), ans<-as.logical(c(0,0,0,0,1,1,1,1,1,0,0)), output='between parallel processing of double with closed bounds')
+attr(x, 'tzone') = 'UTC' # this conversion will result in different UTC times depending on which local time zone the test runs in
+test(2038.12, between(x, x[5], x[9]), ans, output='between parallel processing of double with closed bounds')
 
 # additional flexibility -- cast when one bound is already POSIXct
-up = as.POSIXct(up, tz="UTC")
-test(2038.13, between(x, dn, up), output = 'between parallel processing of double')
+test(2038.13, between(x, as.character(x[5]), x[9]), ans, output='between parallel processing of double')
 options(old)
 
 # exceptions in char to POSIX coercion
@@ -14706,45 +14713,46 @@ test(2038.17, X[c %between% list(a, b)], X[c %between% .(a, b)])
 
 # between num to int coercion #3517
 old = options("datatable.verbose"=TRUE)
-test(2038.18, between(1:5, 2, 4), output="between parallel processing of integer with recycling")
-test(2038.19, between(1:5, 2L, 4), output="between parallel processing of integer with recycling")
-test(2038.20, between(1:5, 2, 4L), output="between parallel processing of integer with recycling")
+test(2038.18, between(1:5, 2, 4), output="between parallel processing of integer")
+test(2038.19, between(1:5, 2L, 4), output="between parallel processing of integer")
+test(2038.20, between(1:5, 2, 4L), output="between parallel processing of integer")
 # revdep regression #3565
-test(2038.21, between(1:10, -Inf, Inf),            rep(TRUE,10), output="between parallel processing of double using closed bounds with recycling")
-test(2038.22, between(as.double(1:10), -Inf, Inf), rep(TRUE,10), output="between parallel processing of double using closed bounds with recycling")
+test(2038.21, between(1:10, -Inf, Inf),            rep(TRUE,10), output="between parallel processing of double with closed bounds")
+test(2038.22, between(as.double(1:10), -Inf, Inf), rep(TRUE,10), output="between parallel processing of double with closed bounds")
 options(old)
 
 # between int64 support
 if (test_bit64) {
   as.i64 = bit64::as.integer64
-  test(2039.01, between(1:10, as.i64(3), as.i64(6)), error="are integer64 class while 'x' argument is not")
-  test(2039.02, between(1:10, 3, as.i64(6)), error="are integer64 class while 'x' argument is not")
+  test(2039.01, between(1:10, as.i64(3), as.i64(6)),  error="x is not integer64 but.*Please align classes")
+  test(2039.02, between(1:10, 3, as.i64(6)),          error="x is not integer64 but.*Please align classes")
+  test(2039.03, between(as.i64(1:3), "2", as.i64(4)), error="x is integer64 but lower and/or upper are not")
   old = options("datatable.verbose"=TRUE)
   x = as.i64(1:10)
   ans36 = c(FALSE,FALSE,TRUE,TRUE,TRUE,TRUE,FALSE,FALSE,FALSE,FALSE)
   ans36open = c(FALSE,FALSE,FALSE,TRUE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE)
-  test(2039.03, between(x, 3, 6), ans36, output="between parallel processing of integer64 with recycling")
-  test(2039.04, between(x, 3, 6, incbounds=FALSE), ans36open, output="between parallel processing of integer64 with recycling")
-  test(2039.05, between(x, 3L, 6), ans36, output="between parallel processing of integer64 with recycling")
-  test(2039.06, between(x, 3L, 6, incbounds=FALSE), ans36open, output="between parallel processing of integer64 with recycling")
-  test(2039.07, between(x, 3, 6L), ans36, output="between parallel processing of integer64 with recycling")
-  test(2039.08, between(x, 3, 6L, incbounds=FALSE), ans36open, output="between parallel processing of integer64 with recycling")
-  test(2039.09, between(x, 3L, 6L), ans36, output="between parallel processing of integer64 with recycling")
-  test(2039.10, between(x, 3L, 6L, incbounds=FALSE), ans36open, output="between parallel processing of integer64 with recycling")
-  test(2039.11, between(x, rep(3, 10L), rep(6, 10L)), ans36, output="between parallel processing of integer64 took")
-  test(2039.12, between(x, rep(3, 10L), rep(6, 10L), incbounds=FALSE), ans36open, output="between parallel processing of integer64 took")
+  test(2039.04, between(x, 3, 6), ans36, output="between parallel processing of integer64 took")
+  test(2039.05, between(x, 3, 6, incbounds=FALSE), ans36open, output="between parallel processing of integer64 took")
+  test(2039.06, between(x, 3L, 6), ans36, output="between parallel processing of integer64 took")
+  test(2039.07, between(x, 3L, 6, incbounds=FALSE), ans36open, output="between parallel processing of integer64 took")
+  test(2039.08, between(x, 3, 6L), ans36, output="between parallel processing of integer64 took")
+  test(2039.09, between(x, 3, 6L, incbounds=FALSE), ans36open, output="between parallel processing of integer64 took")
+  test(2039.10, between(x, 3L, 6L), ans36, output="between parallel processing of integer64 took")
+  test(2039.11, between(x, 3L, 6L, incbounds=FALSE), ans36open, output="between parallel processing of integer64 took")
+  test(2039.12, between(x, rep(3, 10L), rep(6, 10L)), ans36, output="between parallel processing of integer64 took")
+  test(2039.13, between(x, rep(3, 10L), rep(6, 10L), incbounds=FALSE), ans36open, output="between parallel processing of integer64 took")
   maxint = 2147483647
-  test(2039.13, between(x+maxint, 3+maxint, 6+maxint), ans36, output="between parallel processing of integer64 with recycling")
-  test(2039.14, between(x+maxint, 3+maxint, 6+maxint, incbounds=FALSE), ans36open, output="between parallel processing of integer64 with recycling")
+  test(2039.14, between(x+maxint, 3+maxint, 6+maxint), ans36, output="between parallel processing of integer64 took")
+  test(2039.15, between(x+maxint, 3+maxint, 6+maxint, incbounds=FALSE), ans36open, output="between parallel processing of integer64 took")
   x[5] = NA
   ans36[5] = NA
   ans36open[5] = NA
-  test(2039.15, between(x+maxint, 3+maxint, 6+maxint), ans36, output="between parallel processing of integer64 with recycling")
-  test(2039.16, between(x+maxint, 3+maxint, 6+maxint, incbounds=FALSE), ans36open, output="between parallel processing of integer64 with recycling")
-  test(2039.17, between(x+maxint, NA, 6+maxint), c(TRUE, TRUE, tail(ans36, -2L)), output="between parallel processing of integer64 with recycling")
-  test(2039.18, between(x+maxint, 3+maxint, NA, incbounds=FALSE), c(head(ans36open, -5L), rep(TRUE, 5)), output="between parallel processing of integer64 with recycling")
-  test(2039.19, between(x+maxint, rep(NA, 10L), rep(6+maxint, 10L)), c(TRUE, TRUE, tail(ans36, -2L)), output="between parallel processing of integer64 took")
-  test(2039.20, between(x+maxint, rep(3+maxint, 10L), rep(NA, 10L), incbounds=FALSE), c(head(ans36open, -5L), rep(TRUE, 5)), output="between parallel processing of integer64 took")
+  test(2039.16, between(x+maxint, 3+maxint, 6+maxint), ans36, output="between parallel processing of integer64 took")
+  test(2039.17, between(x+maxint, 3+maxint, 6+maxint, incbounds=FALSE), ans36open, output="between parallel processing of integer64 took")
+  test(2039.18, between(x+maxint, NA, 6+maxint), c(TRUE, TRUE, tail(ans36, -2L)), output="between parallel processing of integer64 took")
+  test(2039.19, between(x+maxint, 3+maxint, NA, incbounds=FALSE), c(head(ans36open, -5L), rep(TRUE, 5)), output="between parallel processing of integer64 took")
+  test(2039.20, between(x+maxint, rep(NA, 10L), rep(6+maxint, 10L)), c(TRUE, TRUE, tail(ans36, -2L)), output="between parallel processing of integer64 took")
+  test(2039.21, between(x+maxint, rep(3+maxint, 10L), rep(NA, 10L), incbounds=FALSE), c(head(ans36open, -5L), rep(TRUE, 5)), output="between parallel processing of integer64 took")
   options(old)
 }
 
@@ -15671,6 +15679,57 @@ if (test_bit64) {
   test(2077.06, int64_int32_match(d[, sum(i32, na.rm=TRUE), g], d[, sum(i64, na.rm=TRUE), g]))
 }
 
+# between NAs in character bounds #3667
+test(2078.01, letters %between% c("m", NA_character_), c(rep(FALSE, 12L), rep(TRUE, 14L)))
+test(2078.02, between(letters, "m", NA_character_, incbounds=TRUE), c(rep(FALSE, 12L), rep(TRUE, 14L)))
+test(2078.03, between(letters, "m", NA_character_, incbounds=FALSE), c(rep(FALSE, 13L), rep(TRUE, 13L)))
+test(2078.04, between(letters, NA_character_, "m", incbounds=TRUE), c(rep(TRUE, 13L), rep(FALSE, 13L)))
+test(2078.05, between(letters, NA_character_, "m", incbounds=FALSE), c(rep(TRUE, 12L), rep(FALSE, 14L)))
+test(2078.06, between(letters, NA_character_, NA_character_, incbounds=TRUE), rep(TRUE, 26L))
+test(2078.07, between(letters, NA_character_, NA_character_, incbounds=FALSE), rep(TRUE, 26L))
+test(2078.11, between(c("a","b","c"), c("a","b"), "d"), error="Incompatible vector lengths.*3.*2.*1")
+test(2078.12, between(c("a","b","c"), "a", c("b","d")), error="Incompatible vector lengths.*3.*2.*1")
+test(2078.13, between(c("a","b","c"), c("a","b"), c("b","d")), error="Incompatible vector lengths.*3.*2.*2")
+test(2078.21, between(c("a","c","e"), c("b",NA,"d"), "e", incbounds=TRUE), c(FALSE, TRUE, TRUE))
+test(2078.22, between(c("a","c","e"), c("b",NA,"d"), "e", incbounds=FALSE), c(FALSE, TRUE, FALSE))
+test(2078.23, between(c("a","c","e"), c("b","c","d"), NA, incbounds=TRUE), c(FALSE, TRUE, TRUE))
+test(2078.24, between(c("a","c","e"), c("b","c","d"), NA, incbounds=FALSE), c(FALSE, FALSE, TRUE))
+test(2078.25, between(c("a","c","e"), c("b",NA,"d"), NA, incbounds=TRUE), c(FALSE, TRUE, TRUE))
+test(2078.26, between(c("a","c","e"), c("b",NA,"d"), NA, incbounds=FALSE), c(FALSE, TRUE, TRUE))
+test(2078.27, between(c("a","c","e"), "a", c("b",NA,"d"), incbounds=TRUE), c(TRUE, TRUE, FALSE))
+test(2078.28, between(c("a","c","e"), "a", c("b",NA,"d"), incbounds=FALSE), c(FALSE, TRUE, FALSE))
+test(2078.29, between(c("a","c","e"), NA, c("b",NA,"e"), incbounds=TRUE), c(TRUE, TRUE, TRUE))
+test(2078.30, between(c("a","c","e"), NA, c("b",NA,"e"), incbounds=FALSE), c(TRUE, TRUE, FALSE))
+test(2078.31, between(c("a","c","e"), NA, c("b",NA,"e"), incbounds=TRUE,  NAbounds=NA), c(NA, NA, NA))
+test(2078.32, between(c("a","c","e"), NA, c("b",NA,"e"), incbounds=FALSE, NAbounds=NA), c(NA, NA, FALSE))
+# between NA bound as unknown rather than unlimited, #3522
+test(2079.01, between(1:5, 3L, NA, incbounds=TRUE, NAbounds=NA), c(FALSE, FALSE, NA, NA, NA))
+test(2079.02, between(1:5, 3L, NA, incbounds=FALSE, NAbounds=TRUE), c(FALSE, FALSE, FALSE, TRUE, TRUE))
+test(2079.03, between(1:5, 3L, NA, incbounds=FALSE, NAbounds=FALSE), error="NAbounds must be TRUE or NA")
+# nanotime support
+if (test_nanotime) {
+  n=nanotime(1:4)
+  n[2L]=NA
+  op = options(datatable.verbose=TRUE)
+  test(2080.01, between(n, nanotime(2), nanotime(10)), c(FALSE, NA, TRUE, TRUE), output="between parallel processing of integer64")
+  test(2080.02, between(n, nanotime(3), nanotime(10), incbounds=FALSE), c(FALSE, NA, FALSE, TRUE), output="between parallel processing of integer64")
+  test(2080.03, between(n, nanotime(3), nanotime(NA), incbounds=FALSE, NAbounds=NA), c(FALSE, NA, FALSE, NA), output="between parallel processing of integer64")
+  options(op)
+  test(2080.04, between(1:10, nanotime(3), nanotime(6)), error="x is not integer64 but.*Please align classes")
+  test(2080.05, between(1:10, 3, nanotime(6)),           error="x is not integer64 but.*Please align classes")
+}
+# use raw type to cover fallback to R in between.R
+old = options(datatable.verbose=TRUE)
+test(2081.01, between(as.raw(1:5), as.raw(2), as.raw(4)),                  c(FALSE, TRUE, TRUE, TRUE, FALSE),   output="fallback to slow R")
+test(2081.02, between(as.raw(1:5), as.raw(2), as.raw(4), incbounds=FALSE), c(FALSE, FALSE, TRUE, FALSE, FALSE), output="fallback to slow R")
+options(old)
+test(2081.03, between(3i, NA, NA), error="Not yet implemented NAbounds=TRUE for this non-numeric and non-character type")
+# check that lower<=upper
+test(2082.01, between(1:3, INT(2,3,4), 2L), error="Item 2 of lower (3) is greater than item 1 of upper (2)")
+test(2082.02, between(as.double(1:3), c(2,3,4), 2), error="Item 2 of lower [(]3.*[)] is greater than item 1 of upper [(]2.*[)]")
+if (test_bit64) test(2082.03, between(as.integer64(1:3), as.integer64(2), as.integer64(1)), error="Item 1 of lower (2) is greater than item 1 of upper (1)")
+test(2082.04, between(letters[1:2], c("foo","bar"), c("bar")), error="Item 1 of lower ('foo') is greater than item 1 of upper ('bar')")
+
 
 ###################################
 #  Add new tests above this line  #
diff --git a/man/between.Rd b/man/between.Rd
index f740d0983a..f397b34d4d 100644
--- a/man/between.Rd
+++ b/man/between.Rd
@@ -7,15 +7,17 @@
 \description{
 Intended for use in \code{i} in \code{[.data.table}.
 
-\code{between} is equivalent to \code{x >= lower & x <= upper} when
-\code{incbounds=TRUE}, or \code{x > lower & y < upper} when \code{FALSE}. With a caveat that
-\code{NA} in \code{lower} or \code{upper} are taken as a missing bound and return \code{TRUE} not \code{NA}.
+\code{between} is equivalent to \code{lower<=x & x<=upper} when
+\code{incbounds=TRUE}, or \code{lower<x & y<upper} when \code{FALSE}. With a caveat that
+\code{NA} in \code{lower} or \code{upper} are taken as unlimited bounds not \code{NA}.
+This can be changed by setting \code{NAbounds} to \code{NA}. Further, as a convenient
+safety feature for robustness, an error is issued if any \code{lower>upper}.
 
 \code{inrange} checks whether each value in \code{x} is in between any of
 the intervals provided in \code{lower,upper}.
 }
 \usage{
-between(x, lower, upper, incbounds=TRUE)
+between(x, lower, upper, incbounds=TRUE, NAbounds=TRUE)
 x \%between\% y
 
 inrange(x, lower, upper, incbounds=TRUE)
@@ -32,6 +34,7 @@ interpreted as \code{lower} and \code{y[[2]]} as \code{upper}.}
 \item{incbounds}{ \code{TRUE} means inclusive bounds, i.e., [lower,upper].
 \code{FALSE} means exclusive bounds, i.e., (lower,upper).
 It is set to \code{TRUE} by default for infix notations.}
+\item{NAbounds}{ If \code{lower} (\code{upper}) contains an \code{NA} what should \code{lower<=x} (\code{x<=upper}) return? By default \code{TRUE} so that a missing bound is interpreted as unlimited. }
 }
 \details{
 
diff --git a/src/between.c b/src/between.c
index 8642389a13..344a9c75af 100644
--- a/src/between.c
+++ b/src/between.c
@@ -1,9 +1,8 @@
 #include "data.table.h"
 
-SEXP between(SEXP x, SEXP lower, SEXP upper, SEXP bounds) {
-
+SEXP between(SEXP x, SEXP lower, SEXP upper, SEXP incbounds, SEXP NAboundsArg) {
+  int nprotect = 0;
   R_len_t nx = length(x), nl = length(lower), nu = length(upper);
-  double tic=0.0;
   if (!nx || !nl || !nu)
     return (allocVector(LGLSXP, 0));
   const int longest = MAX(MAX(nx, nl), nu);
@@ -12,13 +11,15 @@ SEXP between(SEXP x, SEXP lower, SEXP upper, SEXP bounds) {
       (nx!=1 && nx!=longest)) {
     error("Incompatible vector lengths: length(x)==%d length(lower)==%d length(upper)==%d. Each should be either length 1 or the length of the longest.", nx, nl, nu);
   }
-  if (!isLogical(bounds) || LOGICAL(bounds)[0] == NA_LOGICAL)
-    error("incbounds must be logical TRUE/FALSE.");
+  const int longestBound = MAX(nl, nu);
+  if (!isLogical(incbounds) || LOGICAL(incbounds)[0]==NA_LOGICAL)
+    error("incbounds must be TRUE or FALSE");
+  const bool open = !LOGICAL(incbounds)[0];
+  if (!isLogical(NAboundsArg) || LOGICAL(NAboundsArg)[0]==FALSE)
+    error("NAbounds must be TRUE or NA");
+  const bool NAbounds = LOGICAL(NAboundsArg)[0]==TRUE;
   const bool verbose = GetVerbose();
 
-  int nprotect = 0;
-  bool integer=true;
-  bool integer64=false;
   if (isInteger(x)) {
     if ((isInteger(lower) || isRealReallyInt(lower)) &&
         (isInteger(upper) || isRealReallyInt(upper))) { // #3517 coerce to num to int when possible
@@ -32,133 +33,158 @@ SEXP between(SEXP x, SEXP lower, SEXP upper, SEXP bounds) {
       x = PROTECT(coerceVector(x, REALSXP)); nprotect++;
     }
   }
-  if (inherits(x,"integer64")) {
-    if (!inherits(lower,"integer64") || !inherits(upper,"integer64"))
-      error("Internal error in between: 'x' is integer64 while 'lower' and/or 'upper' are not, should have been caught by now"); // # nocov
-    integer=false;
-    integer64=true;
-  } else if (isReal(x)) {
-    integer=false;
-    if (!isReal(lower)) {
-      lower = PROTECT(coerceVector(lower, REALSXP)); nprotect++; // these coerces will convert NA appropriately
-    }
-    if (!isReal(upper)) {
-      upper = PROTECT(coerceVector(upper, REALSXP)); nprotect++;
-    }
-  } else if (!isInteger(x)) {
-    error("Internal error in between: 'x' is not int, double or int64, should have been caught by now"); // # nocov
+  if (TYPEOF(lower) != TYPEOF(x)) {
+    lower = PROTECT(coerceVector(lower, TYPEOF(x))); nprotect++;
+  }
+  if (TYPEOF(upper) != TYPEOF(x)) {
+    upper = PROTECT(coerceVector(upper, TYPEOF(x))); nprotect++;
   }
-  // TODO: sweep through lower and upper ensuring lower<=upper (inc bounds) and no lower>upper or lower==INT_MAX
 
   const bool recycleX =   nx==1;
   const bool recycleLow = nl==1;
   const bool recycleUpp = nu==1;
-  const bool open = !LOGICAL(bounds)[0];
+  const int xMask   = recycleX   ? 0 : INT_MAX;
+  const int lowMask = recycleLow ? 0 : INT_MAX;
+  const int uppMask = recycleUpp ? 0 : INT_MAX;
   SEXP ans = PROTECT(allocVector(LGLSXP, longest)); nprotect++;
   int *restrict ansp = LOGICAL(ans);
-  if (integer) {
+  double tic=omp_get_wtime();
+
+  switch (TYPEOF(x)) {
+  case INTSXP: {
     const int *lp = INTEGER(lower);
     const int *up = INTEGER(upper);
     const int *xp = INTEGER(x);
-    if (!recycleX && recycleLow && recycleUpp) {
-      const int l = lp[0] + open;  // +open so we can always use >= and <=.  NA_INTEGER+1 == -INT_MAX == INT_MIN+1 (so NA limit handled by this too)
-      const int u = up[0]==NA_INTEGER ? INT_MAX : up[0] - open;
-      if (verbose) tic = omp_get_wtime();
+    for (int i=0; i<longestBound; ++i) {
+      const int l=lp[i & lowMask], u=up[i & uppMask];
+      if (l!=NA_INTEGER && u!=NA_INTEGER && l>u)
+        error("Item %d of lower (%d) is greater than item %d of upper (%d)", (i&lowMask)+1, l, (i&uppMask)+1, u);
+    }
+    if (NAbounds) {  // default NAbounds==TRUE => NA bound means TRUE; i.e. asif lower=-Inf or upper==Inf)
       #pragma omp parallel for num_threads(getDTthreads())
-      for (int i=0; i<longest; i++) {
-        int elem = xp[i];
-        ansp[i] = elem==NA_INTEGER ? NA_LOGICAL : (l<=elem && elem<=u);
+      for (int i=0; i<longest; ++i) {
+        const int elem=xp[i & xMask], l=lp[i & lowMask], u=up[i & uppMask];
+        ansp[i] = elem==NA_INTEGER ? NA_LOGICAL : (l==NA_INTEGER || l+open<=elem) && (u==NA_INTEGER || elem<=u-open);
+        // +open so we can always use >= and <=.  NA_INTEGER+1 == -INT_MAX == INT_MIN+1 (so NA limit handled by this too)
       }
-      if (verbose) Rprintf("between parallel processing of integer with recycling took %8.3fs\n", omp_get_wtime()-tic);
-    }
-    else {
-      const int xMask = recycleX ? 0 : INT_MAX;
-      const int lowMask = recycleLow ? 0 : INT_MAX;
-      const int uppMask = recycleUpp ? 0 : INT_MAX;
-      if (verbose) tic = omp_get_wtime();
+    } else {
       #pragma omp parallel for num_threads(getDTthreads())
-      for (int i=0; i<longest; i++) {
-        int elem = xp[i & xMask];
-        int l = lp[i & lowMask] +open;
-        int u = up[i & uppMask];
-        u = (u==NA_INTEGER) ? INT_MAX : u-open;
-        ansp[i] = elem==NA_INTEGER ? NA_LOGICAL : (l<=elem && elem<=u);
+      for (int i=0; i<longest; ++i) {
+        const int elem=xp[i & xMask], l=lp[i & lowMask], u=up[i & uppMask];
+        if (elem==NA_INTEGER) { ansp[i]=NA_LOGICAL; continue; }
+        const bool lok = l!=NA_INTEGER, uok = u!=NA_INTEGER;
+        ansp[i] = (lok && uok) ? l+open<=elem && elem<=u-open : ((uok && elem>u-open) || (lok && elem<l+open)) ? FALSE : NA_LOGICAL;
       }
-      if (verbose) Rprintf("between parallel processing of integer took %8.3fs\n", omp_get_wtime()-tic);
     }
-  } else if (!integer64) {
-    // type real
-    const double *lp = REAL(lower);
-    const double *up = REAL(upper);
-    const double *xp = REAL(x);
-    if (!recycleX && recycleLow && recycleUpp) {
-      const double l = isnan(lp[0]) ? -INFINITY : lp[0];
-      const double u = isnan(up[0]) ?  INFINITY : up[0];
-      if (open) {
-        if (verbose) tic = omp_get_wtime();
+    if (verbose) Rprintf("between parallel processing of integer took %8.3fs\n", omp_get_wtime()-tic);
+  } break;
+
+  case REALSXP:
+    if (Rinherits(x, char_integer64)) {
+      if (!Rinherits(lower, char_integer64) || !Rinherits(upper, char_integer64))
+        error("x is integer64 but lower and/or upper are not."); // e.g. between(int64, character, character)
+      const int64_t *lp = (int64_t *)REAL(lower);
+      const int64_t *up = (int64_t *)REAL(upper);
+      const int64_t *xp = (int64_t *)REAL(x);
+      for (int i=0; i<longestBound; ++i) {
+        const int64_t l=lp[i & lowMask], u=up[i & uppMask];
+        if (l!=NA_INTEGER64 && u!=NA_INTEGER64 && l>u)
+          error("Item %d of lower (%lld) is greater than item %d of upper (%lld)", (i&lowMask)+1, l, (i&uppMask)+1, u);
+      }
+      if (NAbounds) {
         #pragma omp parallel for num_threads(getDTthreads())
-        for (int i=0; i<longest; i++) {
-          double elem = xp[i];
-          ansp[i] = isnan(elem) ? NA_LOGICAL : (l<elem && elem<u);
+        for (int i=0; i<longest; ++i) {
+          const int64_t elem=xp[i & xMask], l=lp[i & lowMask], u=up[i & uppMask];
+          ansp[i] = elem==NA_INTEGER64 ? NA_LOGICAL : (l==NA_INTEGER64 || l+open<=elem) && (u==NA_INTEGER64 || elem<=u-open);
         }
-        if (verbose) Rprintf("between parallel processing of double using open bounds with recycling took %8.3fs\n", omp_get_wtime()-tic);
       } else {
-        if (verbose) tic = omp_get_wtime();
         #pragma omp parallel for num_threads(getDTthreads())
-        for (int i=0; i<longest; i++) {
-          double elem = xp[i];
-          ansp[i] = isnan(elem) ? NA_LOGICAL : (l<=elem && elem<=u);
+        for (int i=0; i<longest; ++i) {
+          const int64_t elem=xp[i & xMask], l=lp[i & lowMask], u=up[i & uppMask];
+          if (elem==NA_INTEGER64) { ansp[i]=NA_LOGICAL; continue; }
+          const bool lok = l!=NA_INTEGER64, uok = u!=NA_INTEGER64;
+          ansp[i] = (lok && uok) ? l+open<=elem && elem<=u-open : ((uok && elem>u-open) || (lok && elem<l+open)) ? FALSE : NA_LOGICAL;
         }
-        if (verbose) Rprintf("between parallel processing of double using closed bounds with recycling took %8.3fs\n", omp_get_wtime()-tic);
       }
-    }
-    else {
-      const int xMask = recycleX ? 0 : INT_MAX;
-      const int lowMask = recycleLow ? 0 : INT_MAX;
-      const int uppMask = recycleUpp ? 0 : INT_MAX;
-      if (verbose) tic = omp_get_wtime();
-      #pragma omp parallel for num_threads(getDTthreads())
-      for (int i=0; i<longest; i++) {
-        double elem = xp[i & xMask];
-        double l = lp[i & lowMask];
-        double u = up[i & uppMask];
-        if (isnan(l)) l=-INFINITY;
-        if (isnan(u)) u= INFINITY;
-        ansp[i] = isnan(elem) ? NA_LOGICAL : (open ? l<elem && elem<u : l<=elem && elem<=u);
+      if (verbose) Rprintf("between parallel processing of integer64 took %8.3fs\n", omp_get_wtime()-tic);
+    } else {
+      if (Rinherits(lower, char_integer64) || Rinherits(upper, char_integer64))
+        error("x is not integer64 but lower and/or upper is integer64. Please align classes.");
+      const double *lp = REAL(lower);
+      const double *up = REAL(upper);
+      const double *xp = REAL(x);
+      for (int i=0; i<longestBound; ++i) {
+        const double l=lp[i & lowMask], u=up[i & uppMask];
+        if (!isnan(l) && !isnan(u) && l>u)
+          error("Item %d of lower (%f) is greater than item %d of upper (%f)", (i&lowMask)+1, l, (i&uppMask)+1, u);
       }
-      if (verbose) Rprintf("between parallel processing of double took %8.3fs\n", omp_get_wtime()-tic);
-    }
-  } else {
-    // type integer64
-    const int64_t *lp = (int64_t *)REAL(lower);
-    const int64_t *up = (int64_t *)REAL(upper);
-    const int64_t *xp = (int64_t *)REAL(x);
-    if (!recycleX && recycleLow && recycleUpp) {
-      const int64_t l = lp[0] + open; // +open as for int32 branch
-      const int64_t u = up[0]==NA_INTEGER64 ? MAX_INTEGER64 : up[0] - open;
-      if (verbose) tic = omp_get_wtime();
-      #pragma omp parallel for num_threads(getDTthreads())
-      for (int i=0; i<longest; i++) {
-        int64_t elem = xp[i];
-        ansp[i] = elem==NA_INTEGER64 ? NA_LOGICAL : (l<=elem && elem<=u);
+      if (open) {
+        if (NAbounds) {
+          #pragma omp parallel for num_threads(getDTthreads())
+          for (int i=0; i<longest; ++i) {
+            const double elem=xp[i & xMask], l=lp[i & lowMask], u=up[i & uppMask];
+            ansp[i] = isnan(elem) ? NA_LOGICAL : (isnan(l) || l<elem) && (isnan(u) || elem<u);
+          }
+        } else {
+          #pragma omp parallel for num_threads(getDTthreads())
+          for (int i=0; i<longest; ++i) {
+            const double elem=xp[i & xMask], l=lp[i & lowMask], u=up[i & uppMask];
+            if (isnan(elem)) { ansp[i]=NA_LOGICAL; continue; }
+            const bool lok = !isnan(l), uok = !isnan(u);
+            ansp[i] = (lok && uok) ? l<elem && elem<u : ((uok && elem>=u) || (lok && elem<=l)) ? FALSE : NA_LOGICAL;
+          }
+        }
+        if (verbose) Rprintf("between parallel processing of double with open bounds took %8.3fs\n", omp_get_wtime()-tic);
+      } else {
+        if (NAbounds) {
+          #pragma omp parallel for num_threads(getDTthreads())
+          for (int i=0; i<longest; ++i) {
+            const double elem=xp[i & xMask], l=lp[i & lowMask], u=up[i & uppMask];
+            ansp[i] = isnan(elem) ? NA_LOGICAL : (isnan(l) || l<=elem) && (isnan(u) || elem<=u);
+          }
+        } else {
+          #pragma omp parallel for num_threads(getDTthreads())
+          for (int i=0; i<longest; ++i) {
+            const double elem=xp[i & xMask], l=lp[i & lowMask], u=up[i & uppMask];
+            if (isnan(elem)) { ansp[i]=NA_LOGICAL; continue; }
+            const bool lok = !isnan(l), uok = !isnan(u);
+            ansp[i] = (lok && uok) ? l<=elem && elem<=u : ((uok && elem>u) || (lok && elem<l)) ? FALSE : NA_LOGICAL;
+          }
+        }
+        if (verbose) Rprintf("between parallel processing of double with closed bounds took %8.3fs\n", omp_get_wtime()-tic);
       }
-      if (verbose) Rprintf("between parallel processing of integer64 with recycling took %8.3fs\n", omp_get_wtime()-tic);
     }
-    else {
-      const int xMask = recycleX ? 0 : INT_MAX;
-      const int lowMask = recycleLow ? 0 : INT_MAX;
-      const int uppMask = recycleUpp ? 0 : INT_MAX;
-      if (verbose) tic = omp_get_wtime();
-      #pragma omp parallel for num_threads(getDTthreads())
-      for (int i=0; i<longest; i++) {
-        int64_t elem = xp[i & xMask];
-        int64_t l = lp[i & lowMask] +open;
-        int64_t u = up[i & uppMask];
-        u = u==NA_INTEGER64 ? MAX_INTEGER64 : u-open;
-        ansp[i] = elem==NA_INTEGER64 ? NA_LOGICAL : (l<=elem && elem<=u);
+    break;
+
+  case STRSXP: {
+    const SEXP *lp = STRING_PTR(lower);
+    const SEXP *up = STRING_PTR(upper);
+    const SEXP *xp = STRING_PTR(x);
+    #define LCMP (strcmp(CHAR(ENC2UTF8(l)),CHAR(ENC2UTF8(elem)))<=-open)
+    #define UCMP (strcmp(CHAR(ENC2UTF8(elem)),CHAR(ENC2UTF8(u)))<=-open)
+    // TODO if all ascii can be parallel, otherwise ENC2UTF8 could allocate
+    for (int i=0; i<longestBound; ++i) {
+      const SEXP l=lp[i & lowMask], u=up[i & uppMask];
+      if (l!=NA_STRING && u!=NA_STRING && l!=u && strcmp(CHAR(ENC2UTF8(l)), CHAR(ENC2UTF8(u)))>0)
+        error("Item %d of lower ('%s') is greater than item %d of upper ('%s')", (i&lowMask)+1, CHAR(l), (i&uppMask)+1, CHAR(u));
+    }
+    if (NAbounds) {
+      for (int i=0; i<longest; ++i) {
+        const SEXP elem=xp[i & xMask], l=lp[i & lowMask], u=up[i & uppMask];
+        ansp[i] = elem==NA_STRING ? NA_LOGICAL : (l==NA_STRING || LCMP) && (u==NA_STRING || UCMP);
+      }
+    } else {
+      for (int i=0; i<longest; ++i) {
+        const SEXP elem=xp[i & xMask], l=lp[i & lowMask], u=up[i & uppMask];
+        if (elem==NA_STRING) { ansp[i] = NA_LOGICAL; continue; }
+        const bool lok=(l!=NA_STRING), uok=(u!=NA_STRING);
+        ansp[i] = (lok && uok) ? LCMP && UCMP : ((uok && !UCMP) || (lok && !LCMP)) ? FALSE : NA_LOGICAL;
       }
-      if (verbose) Rprintf("between parallel processing of integer64 took %8.3fs\n", omp_get_wtime()-tic);
     }
+    if (verbose) Rprintf("between non-parallel processing of character took %8.3fs\n", omp_get_wtime()-tic);
+  } break;
+  default:
+    error("Internal error: between.c unsupported type '%s' should have been caught at R level", type2char(TYPEOF(x)));  // # nocov
   }
   UNPROTECT(nprotect);
   return ans;
diff --git a/src/data.table.h b/src/data.table.h
index 692969fca6..b54087697b 100644
--- a/src/data.table.h
+++ b/src/data.table.h
@@ -86,7 +86,7 @@ SEXP sym_starts, char_starts;
 SEXP sym_maxgrpn;
 SEXP sym_colClassesAs;
 SEXP sym_verbose;
-bool INHERITS(SEXP x, SEXP char_);
+SEXP sym_inherits;
 long long DtoLL(double x);
 double LLtoD(long long x);
 bool GetVerbose();
@@ -202,7 +202,7 @@ void nafillInteger(int32_t *x, uint_fast64_t nx, unsigned int type, int32_t fill
 SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP inplace, SEXP cols, SEXP verbose);
 
 // between.c
-SEXP between(SEXP x, SEXP lower, SEXP upper, SEXP bounds);
+SEXP between(SEXP x, SEXP lower, SEXP upper, SEXP incbounds, SEXP NAbounds);
 
 // coalesce.c
 SEXP coalesce(SEXP x, SEXP inplace);
@@ -213,3 +213,6 @@ SEXP isReallyReal(SEXP x);
 SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups);
 void coerceFill(SEXP fill, double *dfill, int32_t *ifill, int64_t *i64fill);
 SEXP coerceFillR(SEXP fill);
+bool INHERITS(SEXP x, SEXP char_);
+bool Rinherits(SEXP x, SEXP char_);
+
diff --git a/src/init.c b/src/init.c
index 1148fc0529..378743a19a 100644
--- a/src/init.c
+++ b/src/init.c
@@ -299,29 +299,12 @@ void attribute_visible R_init_datatable(DllInfo *info)
   sym_colClassesAs = install("colClassesAs");
   sym_verbose = install("datatable.verbose");
   SelfRefSymbol = install(".internal.selfref");
+  sym_inherits = install("inherits");
 
   initDTthreads();
   avoid_openmp_hang_within_fork();
 }
 
-inline bool INHERITS(SEXP x, SEXP char_) {
-  // Thread safe inherits() by pre-calling install() above in init first then
-  // passing those char_* in here for simple and fast non-API pointer compare.
-  // The thread-safety aspect here is only currently actually needed for list columns in
-  // fwrite() where the class of the cell's vector is tested; the class of the column
-  // itself is pre-stored by fwrite (for example in isInteger64[] and isITime[]).
-  // Thread safe in the limited sense of correct and intended usage :
-  // i) no API call such as install() or mkChar() must be passed in.
-  // ii) no attrib writes must be possible in other threads.
-  SEXP klass;
-  if (isString(klass = getAttrib(x, R_ClassSymbol))) {
-    for (int i=0; i<LENGTH(klass); i++) {
-      if (STRING_ELT(klass, i) == char_) return true;
-    }
-  }
-  return false;
-}
-
 inline long long DtoLL(double x) {
   // Type punning such as
   //     *(long long *)&REAL(column)[i]
diff --git a/src/utils.c b/src/utils.c
index cbe49d9967..f17535c318 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -140,3 +140,36 @@ SEXP coerceFillR(SEXP fill) {
   UNPROTECT(protecti);
   return ans;
 }
+
+inline bool INHERITS(SEXP x, SEXP char_) {
+  // Thread safe inherits() by pre-calling install() in init.c and then
+  // passing those char_* in here for simple and fast non-API pointer compare.
+  // The thread-safety aspect here is only currently actually needed for list columns in
+  // fwrite() where the class of the cell's vector is tested; the class of the column
+  // itself is pre-stored by fwrite (for example in isInteger64[] and isITime[]).
+  // Thread safe in the limited sense of correct and intended usage :
+  // i) no API call such as install() or mkChar() must be passed in.
+  // ii) no attrib writes must be possible in other threads.
+  SEXP klass;
+  if (isString(klass = getAttrib(x, R_ClassSymbol))) {
+    for (int i=0; i<LENGTH(klass); i++) {
+      if (STRING_ELT(klass, i) == char_) return true;
+    }
+  }
+  return false;
+}
+
+bool Rinherits(SEXP x, SEXP char_) {
+ // motivation was nanotime which is S4 and inherits from integer64 via S3 extends
+ // R's C API inherits() does not cover S4 and returns FALSE for nanotime, as does our own INHERITS above.
+ // R's R-level inherits() calls objects.c:inherits2 which calls attrib.c:R_data_class2 and
+ // then attrib.c:S4_extends which itself calls R level methods:::.extendsForS3 which then calls R level methods::extends.
+ // Since that chain of calls is so complicated and involves evaluating R level anyway, let's just reuse it.
+ // Rinherits prefix with 'R' to signify i) it calls R level and is not thread safe, and ii) is the R level inherits which covers S4.
+ SEXP vec = PROTECT(ScalarString(char_));
+ SEXP call = PROTECT(lang3(sym_inherits, x, vec));
+ bool ans = LOGICAL(eval(call, R_GlobalEnv))[0]==1;
+ UNPROTECT(2);
+ return ans;
+}
+