From 5ecbe8eb1f722ad0eea0f44b272536871de5b0f1 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Wed, 16 Dec 2020 16:09:33 -0800
Subject: [PATCH 1/9] Start adding support for arithmetic functions in R

---
 r/R/expression.R                      | 25 ++++++++++++
 r/tests/testthat/test-compute-arith.R | 57 +++++++++++++++++++++++++++
 2 files changed, 82 insertions(+)
 create mode 100644 r/tests/testthat/test-compute-arith.R

diff --git a/r/R/expression.R b/r/R/expression.R
index f9e09c2fadd..046b4f11ee6 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -59,6 +59,20 @@ build_array_expression <- function(.Generic, e1, e2, ...) {
   } else {
     e1 <- .wrap_arrow(e1, .Generic, e2$type)
     e2 <- .wrap_arrow(e2, .Generic, e1$type)
+
+    # In Arrow, "divide" is one function, which does integer division on
+    # integer inputs and floating-point division on floats
+    if (.Generic == "/") {
+      # TODO: cast needs to be an expression
+      # TODO: omg so many ways it's wrong to assume these types
+      e1 <- e1$cast(float64())
+      e2 <- e2$cast(float64())
+    } else if (.Generic == "%/%") {
+      e1 <- e1$cast(int32())
+      e2 <- e2$cast(int32())
+    } else if (.Generic == "%%") {
+      # e1 - e1 %/% e2
+    }
     expr <- array_expression(.binary_function_map[[.Generic]], e1, e2, ...)
   }
   expr
@@ -91,9 +105,20 @@ build_array_expression <- function(.Generic, e1, e2, ...) {
   "<=" = "less_equal",
   "&" = "and_kleene",
   "|" = "or_kleene",
+  "+" = "add",
+  "-" = "subtract",
+  "*" = "multiply",
+  "/" = "divide",
+  "%/%" = "divide",
   "%in%" = "is_in_meta_binary"
 )
 
+
+# ‘"^"’,
+# ‘"%%"’,
+# ‘"%/%"’
+
+
 .array_function_map <- c(.unary_function_map, .binary_function_map)
 
 eval_array_expression <- function(x) {
diff --git a/r/tests/testthat/test-compute-arith.R b/r/tests/testthat/test-compute-arith.R
new file mode 100644
index 00000000000..633cc57af1b
--- /dev/null
+++ b/r/tests/testthat/test-compute-arith.R
@@ -0,0 +1,57 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# TODO:
+# * Use _checked variations? See what R does
+# * More tests for edge cases, esp. with division; add test helpers here?
+# * Is there a better "autocasting" solution? See what rules C++ Datasets do
+# * test-dplyr tests
+# * then, dataset tests, special casing for division
+
+test_that("Addition", {
+  a <- Array$create(c(1:4, NA_integer_))
+  expect_type_equal(a, int32())
+  expect_type_equal(a + 4, int32())
+  expect_equal(a + 4, Array$create(c(5:8, NA_integer_)))
+  expect_identical(as.vector(a + 4), c(5:8, NA_integer_))
+  expect_equal(a + 4L, Array$create(c(5:8, NA_integer_)))
+  expect_vector(a + 4L, c(5:8, NA_integer_))
+  expect_equal(a + NA_integer_, Array$create(rep(NA_integer_, 5)))
+  skip("autocasting should happen in compute kernels; R workaround fails on this")
+  expect_type_equal(a + 4.1, float64())
+  expect_equal(a + 4.1, Array$create(c(5.1, 6.1, 7.1, 8.1, NA_real_)))
+})
+
+test_that("Subtraction", {
+  a <- Array$create(c(1:4, NA_integer_))
+  expect_equal(a - 3, Array$create(c(-2:1, NA_integer_)))
+})
+
+test_that("Multiplication", {
+  a <- Array$create(c(1:4, NA_integer_))
+  expect_equal(a * 2, Array$create(c(1:4 * 2L, NA_integer_)))
+})
+
+test_that("Division", {
+  a <- Array$create(c(1:4, NA_integer_))
+  expect_equal(a / 2, Array$create(c(1:4 / 2, NA_real_)))
+  expect_equal(a %/% 2, Array$create(c(0L, 1L, 1L, 2L, NA_integer_)))
+
+  b <- a$cast(float64())
+  expect_equal(b / 2, Array$create(c(1:4 / 2, NA_real_)))
+  expect_equal(b %/% 2, Array$create(c(0L, 1L, 1L, 2L, NA_integer_)))
+})

From 735a484077c4435b75a397d4c888fd0c99eaad27 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Wed, 23 Dec 2020 11:23:10 -0600
Subject: [PATCH 2/9] Adjust some of the casting, datasets not (yet) working

---
 r/R/expression.R                      | 36 +++++++++++++++-------
 r/src/compute.cpp                     | 12 ++++++++
 r/tests/testthat/test-compute-arith.R | 20 +++++++++++--
 r/tests/testthat/test-dataset.R       | 43 +++++++++++++++++++++++++++
 r/tests/testthat/test-dplyr.R         | 42 ++++++++++++++++++++++++++
 5 files changed, 140 insertions(+), 13 deletions(-)

diff --git a/r/R/expression.R b/r/R/expression.R
index 046b4f11ee6..817941a1caa 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -63,15 +63,15 @@ build_array_expression <- function(.Generic, e1, e2, ...) {
     # In Arrow, "divide" is one function, which does integer division on
     # integer inputs and floating-point division on floats
     if (.Generic == "/") {
-      # TODO: cast needs to be an expression
       # TODO: omg so many ways it's wrong to assume these types
-      e1 <- e1$cast(float64())
-      e2 <- e2$cast(float64())
+      e1 <- array_expression("cast", e1, options = list(to_type = float64()))
+      e2 <- array_expression("cast", e2, options = list(to_type = float64()))
     } else if (.Generic == "%/%") {
-      e1 <- e1$cast(int32())
-      e2 <- e2$cast(int32())
+      e1 <- array_expression("cast", e1, options = list(to_type = float64()))
+      e2 <- array_expression("cast", e2, options = list(to_type = float64()))
+      return(array_expression("cast", array_expression(.binary_function_map[[.Generic]], e1, e2, ...), options = list(to_type = int32())))
     } else if (.Generic == "%%") {
-      # e1 - e1 %/% e2
+      # e1 - e2 * ( e1 %/% e2 )
     }
     expr <- array_expression(.binary_function_map[[.Generic]], e1, e2, ...)
   }
@@ -105,11 +105,11 @@ build_array_expression <- function(.Generic, e1, e2, ...) {
   "<=" = "less_equal",
   "&" = "and_kleene",
   "|" = "or_kleene",
-  "+" = "add",
-  "-" = "subtract",
-  "*" = "multiply",
-  "/" = "divide",
-  "%/%" = "divide",
+  "+" = "add_checked",
+  "-" = "subtract_checked",
+  "*" = "multiply_checked",
+  "/" = "divide_checked",
+  "%/%" = "divide_checked",
   "%in%" = "is_in_meta_binary"
 )
 
@@ -221,6 +221,20 @@ build_dataset_expression <- function(.Generic, e1, e2, ...) {
     if (!inherits(e2, "Expression")) {
       e2 <- Expression$scalar(e2)
     }
+
+    # In Arrow, "divide" is one function, which does integer division on
+    # integer inputs and floating-point division on floats
+    if (.Generic == "/") {
+      # TODO: omg so many ways it's wrong to assume these types
+      e1 <- array_expression("cast", e1, options = list(to_type = float64()))
+      e2 <- array_expression("cast", e2, options = list(to_type = float64()))
+    } else if (.Generic == "%/%") {
+      e1 <- array_expression("cast", e1, options = list(to_type = int32()))
+      e2 <- array_expression("cast", e2, options = list(to_type = int32()))
+    } else if (.Generic == "%%") {
+      # e1 - e2 * ( e1 %/% e2 )
+    }
+
     expr <- Expression$create(.binary_function_map[[.Generic]], e1, e2, ...)
   }
   expr
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index 8a75a251d8e..c44d1674153 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -185,6 +185,18 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
                                      cpp11::as_cpp<bool>(options["skip_nulls"]));
   }
 
+  // hacky attempt to pass through to_type
+  if (func_name == "cast") {
+    using Options = arrow::compute::CastOptions;
+    auto out = std::make_shared<Options>(false);
+    SEXP to_type = options["to_type"];
+    if (!Rf_isNull(to_type) && cpp11::as_cpp<std::shared_ptr<arrow::DataType>>(to_type)) {
+      out->to_type = cpp11::as_cpp<std::shared_ptr<arrow::DataType>>(to_type);
+      }
+    return out;
+
+  }
+
   return nullptr;
 }
 
diff --git a/r/tests/testthat/test-compute-arith.R b/r/tests/testthat/test-compute-arith.R
index 633cc57af1b..27d78dd090d 100644
--- a/r/tests/testthat/test-compute-arith.R
+++ b/r/tests/testthat/test-compute-arith.R
@@ -16,10 +16,10 @@
 # under the License.
 
 # TODO:
-# * Use _checked variations? See what R does
 # * More tests for edge cases, esp. with division; add test helpers here?
 # * Is there a better "autocasting" solution? See what rules C++ Datasets do
-# * test-dplyr tests
+# * test-dplyr tests (Added one addition, and one summarize, but check to see if
+# we can make summarize route through arrow need more?)
 # * then, dataset tests, special casing for division
 
 test_that("Addition", {
@@ -31,6 +31,12 @@ test_that("Addition", {
   expect_equal(a + 4L, Array$create(c(5:8, NA_integer_)))
   expect_vector(a + 4L, c(5:8, NA_integer_))
   expect_equal(a + NA_integer_, Array$create(rep(NA_integer_, 5)))
+
+  # overflow errors — this is slightly different from R's `NA` coercion when
+  # overflowing, but better than the alternative of silently restarting
+  casted <- a$cast(int8())
+  expect_error(casted + 257)
+
   skip("autocasting should happen in compute kernels; R workaround fails on this")
   expect_type_equal(a + 4.1, float64())
   expect_equal(a + 4.1, Array$create(c(5.1, 6.1, 7.1, 8.1, NA_real_)))
@@ -50,8 +56,18 @@ test_that("Division", {
   a <- Array$create(c(1:4, NA_integer_))
   expect_equal(a / 2, Array$create(c(1:4 / 2, NA_real_)))
   expect_equal(a %/% 2, Array$create(c(0L, 1L, 1L, 2L, NA_integer_)))
+  expect_equal(a / 2 / 2, Array$create(c(1:4 / 2 / 2, NA_real_)))
+  expect_equal(a %/% 2 %/% 2, Array$create(c(0L, 0L, 0L, 1L, NA_integer_)))
 
   b <- a$cast(float64())
   expect_equal(b / 2, Array$create(c(1:4 / 2, NA_real_)))
   expect_equal(b %/% 2, Array$create(c(0L, 1L, 1L, 2L, NA_integer_)))
+
+  # the behavior of %/% matches R's (i.e. the integer of the quotient, not
+  # simply dividing two integers)
+  expect_equal(b / 2.2, Array$create(c(1:4 / 2.2, NA_real_)))
+  # c(1:4) %/% 2.2 != c(1:4) %/% as.integer(2.2)
+  # c(1:4) %/% 2.2             == c(0L, 0L, 1L, 1L)
+  # c(1:4) %/% as.integer(2.2) == c(0L, 1L, 1L, 2L)
+  expect_equal(b %/% 2.2, Array$create(c(0L, 0L, 1L, 1L, NA_integer_)))
 })
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 4c8db531411..e617a29cbd7 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -523,6 +523,49 @@ test_that("filter() on date32 columns", {
   )
 })
 
+test_that("filter() with expressions", {
+  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
+  expect_is(ds$format, "ParquetFileFormat")
+  expect_is(ds$filesystem, "LocalFileSystem")
+  expect_is(ds, "Dataset")
+  expect_equivalent(
+    ds %>%
+      select(chr, dbl) %>%
+      filter(dbl * 2 > 14 & dbl - 50 < 3L) %>%
+      collect() %>%
+      arrange(dbl),
+    rbind(
+      df1[8:10, c("chr", "dbl")],
+      df2[1:2, c("chr", "dbl")]
+    )
+  )
+
+  # check division's special casing.
+  expect_equivalent(
+    ds %>%
+      select(chr, dbl) %>%
+      filter(dbl / 2 > 3.5 & dbl < 53) %>%
+      collect() %>%
+      arrange(dbl),
+    rbind(
+      df1[8:10, c("chr", "dbl")],
+      df2[1:2, c("chr", "dbl")]
+    )
+  )
+
+  expect_equivalent(
+    ds %>%
+      select(chr, dbl) %>%
+      filter(dbl / 2L > 3.5 & dbl < 53) %>%
+      collect() %>%
+      arrange(dbl),
+    rbind(
+      df1[8:10, c("chr", "dbl")],
+      df2[1:2, c("chr", "dbl")]
+    )
+  )
+})
+
 test_that("filter scalar validation doesn't crash (ARROW-7772)", {
   expect_error(
     ds %>%
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index f9a01d8ceb6..d5c8cb9cd8d 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -133,6 +133,40 @@ test_that("filtering with expression", {
   )
 })
 
+test_that("filtering with arithmetic", {
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl + 1 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl / 2 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl / 2L > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl %/% 2 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+})
+
 test_that("More complex select/filter", {
   expect_dplyr_equal(
     input %>%
@@ -276,6 +310,14 @@ test_that("summarize", {
       summarize(min_int = min(int)),
     tbl
   )
+
+  expect_dplyr_equal(
+    input %>%
+      select(int, chr) %>%
+      filter(int > 5) %>%
+      summarize(min_int = min(int) / 2),
+    tbl
+  )
 })
 
 test_that("mutate", {

From 4b561bff85684df406ea1406ff1c9fdd47335f63 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Wed, 23 Dec 2020 15:43:03 -0600
Subject: [PATCH 3/9] expresion-based casting for division

---
 r/R/expression.R                      | 57 +++++++++++++++++++++------
 r/src/compute.cpp                     | 23 +++++++++--
 r/tests/testthat/test-compute-arith.R |  4 ++
 r/tests/testthat/test-dataset.R       | 12 ------
 r/tests/testthat/test-dplyr.R         |  4 +-
 r/tests/testthat/test-expression.R    | 19 +++++++++
 6 files changed, 91 insertions(+), 28 deletions(-)

diff --git a/r/R/expression.R b/r/R/expression.R
index 817941a1caa..d5ed4c845e9 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -69,9 +69,26 @@ build_array_expression <- function(.Generic, e1, e2, ...) {
     } else if (.Generic == "%/%") {
       e1 <- array_expression("cast", e1, options = list(to_type = float64()))
       e2 <- array_expression("cast", e2, options = list(to_type = float64()))
-      return(array_expression("cast", array_expression(.binary_function_map[[.Generic]], e1, e2, ...), options = list(to_type = int32())))
+      return(array_expression("cast", array_expression(.binary_function_map[[.Generic]], e1, e2, ...), options = list(to_type = int32(), allow_float_truncate = TRUE)))
     } else if (.Generic == "%%") {
-      # e1 - e2 * ( e1 %/% e2 )
+      # {e1 - e2 * ( e1 %/% e2 )}
+      # TODO: there has to be a way to use the form ^^^ instead of this.
+      out <- array_expression(
+        "subtract_checked", e1, array_expression(
+          "multiply_checked", e2, array_expression(
+            # this outer cast is to ensure that the result of this and the
+            # result of multiply are the same
+            "cast",
+            array_expression(
+              "cast",
+              array_expression(.binary_function_map[[.Generic]], e1, e2, ...),
+              options = list(to_type = int32(), allow_float_truncate = TRUE)
+            ),
+            options = list(to_type = e2$type, allow_float_truncate = TRUE)
+          )
+        )
+      )
+      return(out)
     }
     expr <- array_expression(.binary_function_map[[.Generic]], e1, e2, ...)
   }
@@ -110,13 +127,13 @@ build_array_expression <- function(.Generic, e1, e2, ...) {
   "*" = "multiply_checked",
   "/" = "divide_checked",
   "%/%" = "divide_checked",
-  "%in%" = "is_in_meta_binary"
+  "%in%" = "is_in_meta_binary",
+  "%%" = "divide_checked"
 )
 
 
-# ‘"^"’,
-# ‘"%%"’,
-# ‘"%/%"’
+# ‘"^"’
+
 
 
 .array_function_map <- c(.unary_function_map, .binary_function_map)
@@ -226,13 +243,31 @@ build_dataset_expression <- function(.Generic, e1, e2, ...) {
     # integer inputs and floating-point division on floats
     if (.Generic == "/") {
       # TODO: omg so many ways it's wrong to assume these types
-      e1 <- array_expression("cast", e1, options = list(to_type = float64()))
-      e2 <- array_expression("cast", e2, options = list(to_type = float64()))
+      e1 <- Expression$create("cast", e1, options = list(to_type = float64()))
+      e2 <- Expression$create("cast", e2, options = list(to_type = float64()))
     } else if (.Generic == "%/%") {
-      e1 <- array_expression("cast", e1, options = list(to_type = int32()))
-      e2 <- array_expression("cast", e2, options = list(to_type = int32()))
+      e1 <- Expression$create("cast", e1, options = list(to_type = float64()))
+      e2 <- Expression$create("cast", e2, options = list(to_type = float64()))
+      return(Expression$create("cast", Expression$create(.binary_function_map[[.Generic]], e1, e2, ...), options = list(to_type = int32(), allow_float_truncate = TRUE)))
     } else if (.Generic == "%%") {
-      # e1 - e2 * ( e1 %/% e2 )
+      # {e1 - e2 * ( e1 %/% e2 )}
+      # TODO: there has to be a way to use the form ^^^ instead of this.
+      out <- Expression$create(
+        "subtract_checked", e1, Expression$create(
+          "multiply_checked", e2, Expression$create(
+            # this outer cast is to ensure that the result of this and the
+            # result of multiply are the same
+            "cast",
+            Expression$create(
+              "cast",
+              Expression$create(.binary_function_map[[.Generic]], e1, e2, ...),
+              options = list(to_type = int32(), allow_float_truncate = TRUE)
+            ),
+            options = list(to_type = e2$type, allow_float_truncate = TRUE)
+          )
+        )
+      )
+      return(out)
     }
 
     expr <- Expression$create(.binary_function_map[[.Generic]], e1, e2, ...)
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index c44d1674153..4497f5b59a3 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -185,16 +185,31 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
                                      cpp11::as_cpp<bool>(options["skip_nulls"]));
   }
 
-  // hacky attempt to pass through to_type
+  // hacky attempt to pass through to_type and other options
   if (func_name == "cast") {
     using Options = arrow::compute::CastOptions;
-    auto out = std::make_shared<Options>(false);
+    auto out = std::make_shared<Options>(true);
     SEXP to_type = options["to_type"];
     if (!Rf_isNull(to_type) && cpp11::as_cpp<std::shared_ptr<arrow::DataType>>(to_type)) {
       out->to_type = cpp11::as_cpp<std::shared_ptr<arrow::DataType>>(to_type);
-      }
-    return out;
+    }
 
+    SEXP allow_float_truncate = options["allow_float_truncate"];
+    if (!Rf_isNull(allow_float_truncate) && cpp11::as_cpp<bool>(allow_float_truncate)) {
+      out->allow_float_truncate = cpp11::as_cpp<bool>(allow_float_truncate);
+    }
+
+    SEXP allow_time_truncate = options["allow_time_truncate"];
+    if (!Rf_isNull(allow_time_truncate) && cpp11::as_cpp<bool>(allow_time_truncate)) {
+      out->allow_time_truncate = cpp11::as_cpp<bool>(allow_time_truncate);
+    }
+
+    SEXP allow_int_overflow = options["allow_int_overflow"];
+    if (!Rf_isNull(allow_int_overflow) && cpp11::as_cpp<bool>(allow_int_overflow)) {
+      out->allow_int_overflow = cpp11::as_cpp<bool>(allow_int_overflow);
+    }
+
+    return out;
   }
 
   return nullptr;
diff --git a/r/tests/testthat/test-compute-arith.R b/r/tests/testthat/test-compute-arith.R
index 27d78dd090d..829eaae5796 100644
--- a/r/tests/testthat/test-compute-arith.R
+++ b/r/tests/testthat/test-compute-arith.R
@@ -70,4 +70,8 @@ test_that("Division", {
   # c(1:4) %/% 2.2             == c(0L, 0L, 1L, 1L)
   # c(1:4) %/% as.integer(2.2) == c(0L, 1L, 1L, 2L)
   expect_equal(b %/% 2.2, Array$create(c(0L, 0L, 1L, 1L, NA_integer_)))
+
+  expect_equal(a %% 2, Array$create(c(1L, 0L, 1L, 0L, NA_integer_)))
+
+  expect_equal(b %% 2, Array$create(c(1:4 %% 2, NA_real_)))
 })
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index e617a29cbd7..1125e83cd8d 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -552,18 +552,6 @@ test_that("filter() with expressions", {
       df2[1:2, c("chr", "dbl")]
     )
   )
-
-  expect_equivalent(
-    ds %>%
-      select(chr, dbl) %>%
-      filter(dbl / 2L > 3.5 & dbl < 53) %>%
-      collect() %>%
-      arrange(dbl),
-    rbind(
-      df1[8:10, c("chr", "dbl")],
-      df2[1:2, c("chr", "dbl")]
-    )
-  )
 })
 
 test_that("filter scalar validation doesn't crash (ARROW-7772)", {
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index d5c8cb9cd8d..79317a9b201 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -163,7 +163,9 @@ test_that("filtering with arithmetic", {
       filter(dbl %/% 2 > 3) %>%
       select(string = chr, int, dbl) %>%
       collect(),
-    tbl
+    tbl,
+    # TODO: why are record batched versions problematic?
+    skip_record_batch = "record batches aren't (auto?) casting correctly"
   )
 })
 
diff --git a/r/tests/testthat/test-expression.R b/r/tests/testthat/test-expression.R
index 0c5ef4c12da..1251cf0e7c5 100644
--- a/r/tests/testthat/test-expression.R
+++ b/r/tests/testthat/test-expression.R
@@ -66,3 +66,22 @@ test_that("C++ expressions", {
   # Interprets that as a list type
   expect_is(f == c(1L, 2L), "Expression")
 })
+
+test_that("Can create an expression", {
+  a <- Array$create(as.numeric(1:5))
+  expr <- array_expression("cast", a, options = list(to_type = int32()))
+  expect_is(expr, "array_expression")
+  expect_equal(eval_array_expression(expr), Array$create(1:5))
+
+  b <- Array$create(0.5:4.5)
+  bad_expr <- array_expression("cast", b, options = list(to_type = int32()))
+  expect_is(bad_expr, "array_expression")
+  expect_error(
+    eval_array_expression(bad_expr),
+    "Invalid: Float value .* was truncated converting"
+  )
+  expr <- array_expression("cast", b, options = list(to_type = int32(), allow_float_truncate = TRUE))
+  expect_is(expr, "array_expression")
+  expect_equal(eval_array_expression(expr), Array$create(0:4))
+})
+

From 3a6a545024f5c5a0547d770a2c845d7a599f9582 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Wed, 30 Dec 2020 12:27:18 -0600
Subject: [PATCH 4/9] PR comments

---
 r/R/expression.R                      | 63 ++++++++++++---------------
 r/tests/testthat/test-compute-arith.R | 24 ++++++----
 r/tests/testthat/test-dataset.R       | 61 ++++++++++++++++++++++++++
 r/tests/testthat/test-dplyr.R         | 50 ++++++++++++++++++---
 4 files changed, 150 insertions(+), 48 deletions(-)

diff --git a/r/R/expression.R b/r/R/expression.R
index d5ed4c845e9..673225ea15a 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -64,15 +64,15 @@ build_array_expression <- function(.Generic, e1, e2, ...) {
     # integer inputs and floating-point division on floats
     if (.Generic == "/") {
       # TODO: omg so many ways it's wrong to assume these types
-      e1 <- array_expression("cast", e1, options = list(to_type = float64()))
-      e2 <- array_expression("cast", e2, options = list(to_type = float64()))
+      e1 <- e1$cast(float64())
+      e2 <- e2$cast(float64())
     } else if (.Generic == "%/%") {
-      e1 <- array_expression("cast", e1, options = list(to_type = float64()))
-      e2 <- array_expression("cast", e2, options = list(to_type = float64()))
       return(array_expression("cast", array_expression(.binary_function_map[[.Generic]], e1, e2, ...), options = list(to_type = int32(), allow_float_truncate = TRUE)))
     } else if (.Generic == "%%") {
       # {e1 - e2 * ( e1 %/% e2 )}
       # TODO: there has to be a way to use the form ^^^ instead of this.
+      # with return(e1 - e2 * (e1 %/% e2)) we get:
+      # "cannot add bindings to a locked environment"
       out <- array_expression(
         "subtract_checked", e1, array_expression(
           "multiply_checked", e2, array_expression(
@@ -90,6 +90,13 @@ build_array_expression <- function(.Generic, e1, e2, ...) {
       )
       return(out)
     }
+
+    # hack to use subtract instead of subtract_checked for timestamps
+    if (inherits(e1$type, "Timestamp") && inherits(e2$type, "Timestamp") && .Generic == "-"){
+      # don't use the checked variant for timestamp
+      return(array_expression("subtract", e1, e2, ...))
+    }
+
     expr <- array_expression(.binary_function_map[[.Generic]], e1, e2, ...)
   }
   expr
@@ -127,15 +134,13 @@ build_array_expression <- function(.Generic, e1, e2, ...) {
   "*" = "multiply_checked",
   "/" = "divide_checked",
   "%/%" = "divide_checked",
-  "%in%" = "is_in_meta_binary",
-  "%%" = "divide_checked"
+  # we don't actually use divide_checked with `%%`, rather it is rewritten to
+  # use %/% above.
+  "%%" = "divide_checked",
+  # TODO: "^"  (ARROW-11070)
+  "%in%" = "is_in_meta_binary"
 )
 
-
-# ‘"^"’
-
-
-
 .array_function_map <- c(.unary_function_map, .binary_function_map)
 
 eval_array_expression <- function(x) {
@@ -202,7 +207,10 @@ print.array_expression <- function(x, ...) {
 #' @export
 Expression <- R6Class("Expression", inherit = ArrowObject,
   public = list(
-    ToString = function() dataset___expr__ToString(self)
+    ToString = function() dataset___expr__ToString(self),
+    cast = function(to_type, ...) {
+      Expression$create("cast", self, options = list(to_type = to_type, ...))
+    }
   )
 )
 Expression$create <- function(function_name,
@@ -243,31 +251,16 @@ build_dataset_expression <- function(.Generic, e1, e2, ...) {
     # integer inputs and floating-point division on floats
     if (.Generic == "/") {
       # TODO: omg so many ways it's wrong to assume these types
-      e1 <- Expression$create("cast", e1, options = list(to_type = float64()))
-      e2 <- Expression$create("cast", e2, options = list(to_type = float64()))
+      e1 <- e1$cast(float64())
+      e2 <- e2$cast(float64())
     } else if (.Generic == "%/%") {
-      e1 <- Expression$create("cast", e1, options = list(to_type = float64()))
-      e2 <- Expression$create("cast", e2, options = list(to_type = float64()))
-      return(Expression$create("cast", Expression$create(.binary_function_map[[.Generic]], e1, e2, ...), options = list(to_type = int32(), allow_float_truncate = TRUE)))
+      # In R, integer division works like floor(float division)
+      out <- build_dataset_expression("/", e1, e2)
+      return(out$cast(int32(), allow_float_truncate = TRUE))
     } else if (.Generic == "%%") {
-      # {e1 - e2 * ( e1 %/% e2 )}
-      # TODO: there has to be a way to use the form ^^^ instead of this.
-      out <- Expression$create(
-        "subtract_checked", e1, Expression$create(
-          "multiply_checked", e2, Expression$create(
-            # this outer cast is to ensure that the result of this and the
-            # result of multiply are the same
-            "cast",
-            Expression$create(
-              "cast",
-              Expression$create(.binary_function_map[[.Generic]], e1, e2, ...),
-              options = list(to_type = int32(), allow_float_truncate = TRUE)
-            ),
-            options = list(to_type = e2$type, allow_float_truncate = TRUE)
-          )
-        )
-      )
-      return(out)
+      # TODO: need to do something with types to ensure that e2 is compatible
+      # with e1 %/% e2 and e1.
+      return(e1 - e2 * ( e1 %/% e2 ))
     }
 
     expr <- Expression$create(.binary_function_map[[.Generic]], e1, e2, ...)
diff --git a/r/tests/testthat/test-compute-arith.R b/r/tests/testthat/test-compute-arith.R
index 829eaae5796..7ac5bd9016c 100644
--- a/r/tests/testthat/test-compute-arith.R
+++ b/r/tests/testthat/test-compute-arith.R
@@ -15,13 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# TODO:
-# * More tests for edge cases, esp. with division; add test helpers here?
-# * Is there a better "autocasting" solution? See what rules C++ Datasets do
-# * test-dplyr tests (Added one addition, and one summarize, but check to see if
-# we can make summarize route through arrow need more?)
-# * then, dataset tests, special casing for division
-
 test_that("Addition", {
   a <- Array$create(c(1:4, NA_integer_))
   expect_type_equal(a, int32())
@@ -37,7 +30,7 @@ test_that("Addition", {
   casted <- a$cast(int8())
   expect_error(casted + 257)
 
-  skip("autocasting should happen in compute kernels; R workaround fails on this")
+  skip("autocasting should happen in compute kernels; R workaround fails on this ARROW-11078")
   expect_type_equal(a + 4.1, float64())
   expect_equal(a + 4.1, Array$create(c(5.1, 6.1, 7.1, 8.1, NA_real_)))
 })
@@ -75,3 +68,18 @@ test_that("Division", {
 
   expect_equal(b %% 2, Array$create(c(1:4 %% 2, NA_real_)))
 })
+
+test_that("Dates casting", {
+  a <- Array$create(c(Sys.Date() + 1:4, NA_integer_))
+
+  skip("autocasting should happen in compute kernels; R workaround fails on this ARROW-11078")
+  expect_equal(a + 2, Array$create(c((Sys.Date() + 1:4 ) + 2), NA_integer_))
+})
+
+test_that("Datetimes", {
+  a <- Array$create(c(Sys.time() + 1:4, NA_integer_))
+  b <- Scalar$create(Sys.time())
+  result <- a - b
+  expect_is(result$type, "DataType")
+  expect_identical(result$type$ToString(), "duration[us]")
+})
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 1125e83cd8d..7b127b6e515 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -552,6 +552,67 @@ test_that("filter() with expressions", {
       df2[1:2, c("chr", "dbl")]
     )
   )
+
+  expect_equivalent(
+    ds %>%
+      select(chr, dbl, int) %>%
+      filter(int %/% 2L > 3 & dbl < 53) %>%
+      collect() %>%
+      arrange(dbl),
+    rbind(
+      df1[8:10, c("chr", "dbl", "int")],
+      df2[1:2, c("chr", "dbl", "int")]
+    )
+  )
+
+  expect_equivalent(
+    ds %>%
+      select(chr, dbl, int) %>%
+      filter(int %/% 2 > 3 & dbl < 53) %>%
+      collect() %>%
+      arrange(dbl),
+    rbind(
+      df1[8:10, c("chr", "dbl", "int")],
+      df2[1:2, c("chr", "dbl", "int")]
+    )
+  )
+
+  expect_equivalent(
+    ds %>%
+      select(chr, dbl, int) %>%
+      filter(int %% 2L > 0 & dbl < 53) %>%
+      collect() %>%
+      arrange(dbl),
+    rbind(
+      df1[c(1, 3, 5, 7, 9), c("chr", "dbl", "int")],
+      df2[1, c("chr", "dbl", "int")]
+    )
+  )
+
+  skip("autocasting should happen in compute kernels; R workaround fails on this ARROW-11078")
+  expect_equivalent(
+    ds %>%
+      select(chr, dbl, int) %>%
+      filter(int %% 2 > 0 & dbl < 53) %>%
+      collect() %>%
+      arrange(dbl),
+    rbind(
+      df1[c(1, 3, 5, 7, 9), c("chr", "dbl", "int")],
+      df2[1, c("chr", "dbl", "int")]
+    )
+  )
+
+  expect_equivalent(
+    ds %>%
+      select(chr, dbl, int) %>%
+      filter(dbl + int > 15 & dbl < 53L) %>%
+      collect() %>%
+      arrange(dbl),
+    rbind(
+      df1[8:10, c("chr", "dbl", "int")],
+      df2[1:2, c("chr", "dbl", "int")]
+    )
+  )
 })
 
 test_that("filter scalar validation doesn't crash (ARROW-7772)", {
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index 79317a9b201..f98bda0e82a 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -27,6 +27,8 @@ expect_dplyr_equal <- function(expr, # A dplyr pipeline with `input` as its star
   expr <- rlang::enquo(expr)
   expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = tbl)))
 
+  skip_msg <- NULL
+
   if (is.null(skip_record_batch)) {
     via_batch <- rlang::eval_tidy(
       expr,
@@ -34,7 +36,7 @@ expect_dplyr_equal <- function(expr, # A dplyr pipeline with `input` as its star
     )
     expect_equivalent(via_batch, expected, ...)
   } else {
-    skip(skip_record_batch)
+    skip_msg <- c(skip_msg, skip_record_batch)
   }
 
   if (is.null(skip_table)) {
@@ -44,7 +46,11 @@ expect_dplyr_equal <- function(expr, # A dplyr pipeline with `input` as its star
     )
     expect_equivalent(via_table, expected, ...)
   } else {
-    skip(skip_table)
+    skip_msg <- c(skip_msg, skip_table)
+  }
+
+  if (!is.null(skip_msg)) {
+    skip(paste(skip_msg, collpase = "\n"))
   }
 }
 
@@ -158,14 +164,48 @@ test_that("filtering with arithmetic", {
     tbl
   )
 
+  expect_dplyr_equal(
+    input %>%
+      filter(int / 2 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(int / 2L > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  skip("autocasting should happen in compute kernels; R workaround fails on this ARROW-11078")
   expect_dplyr_equal(
     input %>%
       filter(dbl %/% 2 > 3) %>%
       select(string = chr, int, dbl) %>%
       collect(),
-    tbl,
-    # TODO: why are record batched versions problematic?
-    skip_record_batch = "record batches aren't (auto?) casting correctly"
+    tbl
+  )
+})
+
+test_that("filtering with expression + autocasting", {
+  skip("autocasting should happen in compute kernels; R workaround fails on this ARROW-11078")
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl + 1 > 3L) %>% # test autocasting with comparison to 3L
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(int + 1 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
   )
 })
 

From 7cf15d4e5e4175a133ad0da4803440d29032748a Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Wed, 30 Dec 2020 13:47:00 -0800
Subject: [PATCH 5/9] Move array/scalar autocasting to eval time

---
 r/R/arrowExports.R                    |  4 --
 r/R/expression.R                      | 76 ++++++++++++++++-----------
 r/R/scalar.R                          | 10 +++-
 r/src/arrowExports.cpp                | 19 +------
 r/src/scalar.cpp                      |  6 ---
 r/tests/testthat/test-compute-arith.R |  3 +-
 r/tests/testthat/test-dataset.R       | 17 +++++-
 r/tests/testthat/test-dplyr.R         |  4 +-
 r/tests/testthat/test-expression.R    |  3 +-
 9 files changed, 74 insertions(+), 68 deletions(-)

diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 11fd99b2321..c41ef33cb69 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -1412,10 +1412,6 @@ Scalar__ToString <- function(s){
     .Call(`_arrow_Scalar__ToString` , s)
 }
 
-Scalar__CastTo <- function(s, t){
-    .Call(`_arrow_Scalar__CastTo` , s, t)
-}
-
 StructScalar__field <- function(s, i){
     .Call(`_arrow_StructScalar__field` , s, i)
 }
diff --git a/r/R/expression.R b/r/R/expression.R
index 673225ea15a..bd944a27a59 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -57,38 +57,28 @@ build_array_expression <- function(.Generic, e1, e2, ...) {
   if (.Generic %in% names(.unary_function_map)) {
     expr <- array_expression(.unary_function_map[[.Generic]], e1)
   } else {
-    e1 <- .wrap_arrow(e1, .Generic, e2$type)
-    e2 <- .wrap_arrow(e2, .Generic, e1$type)
+    e1 <- .wrap_arrow(e1, .Generic)
+    e2 <- .wrap_arrow(e2, .Generic)
 
     # In Arrow, "divide" is one function, which does integer division on
     # integer inputs and floating-point division on floats
     if (.Generic == "/") {
       # TODO: omg so many ways it's wrong to assume these types
-      e1 <- e1$cast(float64())
-      e2 <- e2$cast(float64())
+      e1 <- cast_array_expression(e1, float64())
+      e2 <- cast_array_expression(e2, float64())
     } else if (.Generic == "%/%") {
-      return(array_expression("cast", array_expression(.binary_function_map[[.Generic]], e1, e2, ...), options = list(to_type = int32(), allow_float_truncate = TRUE)))
+      # In R, integer division works like floor(float division)
+      out <- build_array_expression("/", e1, e2)
+      return(cast_array_expression(out, int32(), allow_float_truncate = TRUE))
     } else if (.Generic == "%%") {
       # {e1 - e2 * ( e1 %/% e2 )}
-      # TODO: there has to be a way to use the form ^^^ instead of this.
-      # with return(e1 - e2 * (e1 %/% e2)) we get:
-      # "cannot add bindings to a locked environment"
-      out <- array_expression(
-        "subtract_checked", e1, array_expression(
-          "multiply_checked", e2, array_expression(
-            # this outer cast is to ensure that the result of this and the
-            # result of multiply are the same
-            "cast",
-            array_expression(
-              "cast",
-              array_expression(.binary_function_map[[.Generic]], e1, e2, ...),
-              options = list(to_type = int32(), allow_float_truncate = TRUE)
-            ),
-            options = list(to_type = e2$type, allow_float_truncate = TRUE)
-          )
-        )
-      )
-      return(out)
+      # ^^^ form doesn't work because Ops.Array evaluates eagerly,
+      # but we can build that up
+      quotient <- build_array_expression("%/%", e1, e2)
+      # this cast is to ensure that the result of this and e1 are the same
+      # (autocasting only applies to scalars)
+      base <- cast_array_expression(quotient * e2, e1$type)
+      return(build_array_expression("-", e1, base))
     }
 
     # hack to use subtract instead of subtract_checked for timestamps
@@ -102,14 +92,24 @@ build_array_expression <- function(.Generic, e1, e2, ...) {
   expr
 }
 
-.wrap_arrow <- function(arg, fun, type) {
+cast_array_expression <- function(x, to_type, safe = TRUE, ...) {
+  opts <- list(
+    to_type = to_type,
+    allow_int_overflow = !safe,
+    allow_time_truncate = !safe,
+    allow_float_truncate = !safe
+  )
+  array_expression("cast", x, options = modifyList(opts, list(...)))
+}
+
+.wrap_arrow <- function(arg, fun) {
   if (!inherits(arg, c("ArrowObject", "array_expression"))) {
     # TODO: Array$create if lengths are equal?
     # TODO: these kernels should autocast like the dataset ones do (e.g. int vs. float)
     if (fun == "%in%") {
-      arg <- Array$create(arg, type = type)
+      arg <- Array$create(arg)
     } else {
-      arg <- Scalar$create(arg, type = type)
+      arg <- Scalar$create(arg)
     }
   }
   arg
@@ -151,6 +151,16 @@ eval_array_expression <- function(x) {
       a
     }
   })
+  if (length(x$args) == 2L) {
+    # Insert implicit casts
+    if (inherits(x$args[[1]], "Scalar")) {
+      x$args[[1]] <- x$args[[1]]$cast(x$args[[2]]$type)
+    } else if (inherits(x$args[[2]], "Scalar")) {
+      x$args[[2]] <- x$args[[2]]$cast(x$args[[1]]$type)
+    } else if (x$fun == "is_in_meta_binary" && inherits(x$args[[2]], "Array")) {
+      x$args[[2]] <- x$args[[2]]$cast(x$args[[1]]$type)
+    }
+  }
   call_function(x$fun, args = x$args, options = x$options %||% empty_named_list())
 }
 
@@ -208,8 +218,14 @@ print.array_expression <- function(x, ...) {
 Expression <- R6Class("Expression", inherit = ArrowObject,
   public = list(
     ToString = function() dataset___expr__ToString(self),
-    cast = function(to_type, ...) {
-      Expression$create("cast", self, options = list(to_type = to_type, ...))
+    cast = function(to_type, safe = TRUE, ...) {
+      opts <- list(
+        to_type = to_type,
+        allow_int_overflow = !safe,
+        allow_time_truncate = !safe,
+        allow_float_truncate = !safe
+      )
+      Expression$create("cast", self, options = modifyList(opts, list(...)))
     }
   )
 )
@@ -258,8 +274,6 @@ build_dataset_expression <- function(.Generic, e1, e2, ...) {
       out <- build_dataset_expression("/", e1, e2)
       return(out$cast(int32(), allow_float_truncate = TRUE))
     } else if (.Generic == "%%") {
-      # TODO: need to do something with types to ensure that e2 is compatible
-      # with e1 %/% e2 and e1.
       return(e1 - e2 * ( e1 %/% e2 ))
     }
 
diff --git a/r/R/scalar.R b/r/R/scalar.R
index 12f29990e0a..774fe571145 100644
--- a/r/R/scalar.R
+++ b/r/R/scalar.R
@@ -32,8 +32,14 @@ Scalar <- R6Class("Scalar",
   # TODO: document the methods
   public = list(
     ToString = function() Scalar__ToString(self),
-    cast = function(target_type) {
-      Scalar__CastTo(self, as_type(target_type))
+    cast = function(target_type, safe = TRUE, ...) {
+      opts <- list(
+        to_type = as_type(target_type),
+        allow_int_overflow = !safe,
+        allow_time_truncate = !safe,
+        allow_float_truncate = !safe
+      )
+      call_function("cast", self, options = modifyList(opts, list(...)))
     },
     as_vector = function() Scalar__as_vector(self)
   ),
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 670bee20665..882c9469344 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -5539,23 +5539,7 @@ END_CPP11
 }
 #else
 extern "C" SEXP _arrow_Scalar__ToString(SEXP s_sexp){
-	Rf_error("Cannot call Scalar__ToString(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
-}
-#endif
-
-// scalar.cpp
-#if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<arrow::Scalar> Scalar__CastTo(const std::shared_ptr<arrow::Scalar>& s, const std::shared_ptr<arrow::DataType>& t);
-extern "C" SEXP _arrow_Scalar__CastTo(SEXP s_sexp, SEXP t_sexp){
-BEGIN_CPP11
-	arrow::r::Input<const std::shared_ptr<arrow::Scalar>&>::type s(s_sexp);
-	arrow::r::Input<const std::shared_ptr<arrow::DataType>&>::type t(t_sexp);
-	return cpp11::as_sexp(Scalar__CastTo(s, t));
-END_CPP11
-}
-#else
-extern "C" SEXP _arrow_Scalar__CastTo(SEXP s_sexp, SEXP t_sexp){
-	Rf_error("Cannot call Scalar__CastTo(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+	Rf_error("Cannot call Scalar__ToString(). Please use arrow::install_arrow() to install required runtime libraries. ");
 }
 #endif
 
@@ -6617,7 +6601,6 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_ipc___RecordBatchStreamWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamWriter__Open, 4}, 
 		{ "_arrow_Array__GetScalar", (DL_FUNC) &_arrow_Array__GetScalar, 2}, 
 		{ "_arrow_Scalar__ToString", (DL_FUNC) &_arrow_Scalar__ToString, 1}, 
-		{ "_arrow_Scalar__CastTo", (DL_FUNC) &_arrow_Scalar__CastTo, 2}, 
 		{ "_arrow_StructScalar__field", (DL_FUNC) &_arrow_StructScalar__field, 2}, 
 		{ "_arrow_StructScalar__GetFieldByName", (DL_FUNC) &_arrow_StructScalar__GetFieldByName, 2}, 
 		{ "_arrow_Scalar__as_vector", (DL_FUNC) &_arrow_Scalar__as_vector, 1}, 
diff --git a/r/src/scalar.cpp b/r/src/scalar.cpp
index 2c2d291b5bf..c0cc396b02d 100644
--- a/r/src/scalar.cpp
+++ b/r/src/scalar.cpp
@@ -47,12 +47,6 @@ std::string Scalar__ToString(const std::shared_ptr<arrow::Scalar>& s) {
   return s->ToString();
 }
 
-// [[arrow::export]]
-std::shared_ptr<arrow::Scalar> Scalar__CastTo(const std::shared_ptr<arrow::Scalar>& s,
-                                              const std::shared_ptr<arrow::DataType>& t) {
-  return ValueOrStop(s->CastTo(t));
-}
-
 // [[arrow::export]]
 std::shared_ptr<arrow::Scalar> StructScalar__field(
     const std::shared_ptr<arrow::StructScalar>& s, int i) {
diff --git a/r/tests/testthat/test-compute-arith.R b/r/tests/testthat/test-compute-arith.R
index 7ac5bd9016c..393fdf45455 100644
--- a/r/tests/testthat/test-compute-arith.R
+++ b/r/tests/testthat/test-compute-arith.R
@@ -28,7 +28,8 @@ test_that("Addition", {
   # overflow errors — this is slightly different from R's `NA` coercion when
   # overflowing, but better than the alternative of silently restarting
   casted <- a$cast(int8())
-  expect_error(casted + 257)
+  expect_error(casted + 127)
+  expect_error(casted + 200)
 
   skip("autocasting should happen in compute kernels; R workaround fails on this ARROW-11078")
   expect_type_equal(a + 4.1, float64())
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 7b127b6e515..fbdf5307cea 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -589,7 +589,20 @@ test_that("filter() with expressions", {
     )
   )
 
-  skip("autocasting should happen in compute kernels; R workaround fails on this ARROW-11078")
+  expect_equivalent(
+    ds %>%
+      select(chr, dbl, int) %>%
+      filter(int %% 2L > 0 & dbl < 53) %>%
+      collect() %>%
+      arrange(dbl),
+    rbind(
+      df1[c(1, 3, 5, 7, 9), c("chr", "dbl", "int")],
+      df2[1, c("chr", "dbl", "int")]
+    )
+  )
+
+  skip("Implicit casts aren't being inserted everywhere they need to be")
+  # Error: NotImplemented: Function multiply_checked has no kernel matching input types (scalar[double], array[int32])
   expect_equivalent(
     ds %>%
       select(chr, dbl, int) %>%
@@ -602,6 +615,8 @@ test_that("filter() with expressions", {
     )
   )
 
+  skip("Implicit casts are only inserted for scalars")
+  # Error: NotImplemented: Function add_checked has no kernel matching input types (array[double], array[int32])
   expect_equivalent(
     ds %>%
       select(chr, dbl, int) %>%
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index f98bda0e82a..a80e17c6f3e 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -180,7 +180,6 @@ test_that("filtering with arithmetic", {
     tbl
   )
 
-  skip("autocasting should happen in compute kernels; R workaround fails on this ARROW-11078")
   expect_dplyr_equal(
     input %>%
       filter(dbl %/% 2 > 3) %>%
@@ -191,7 +190,6 @@ test_that("filtering with arithmetic", {
 })
 
 test_that("filtering with expression + autocasting", {
-  skip("autocasting should happen in compute kernels; R workaround fails on this ARROW-11078")
   expect_dplyr_equal(
     input %>%
       filter(dbl + 1 > 3L) %>% # test autocasting with comparison to 3L
@@ -243,7 +241,7 @@ test_that("Print method", {
 int: int32
 chr: string
 
-* Filter: and(and(greater(<Array>, 2), or(equal(<Array>, "d"), equal(<Array>, "f"))), less(<Array>, 5L))
+* Filter: and(and(greater(<Array>, 2), or(equal(<Array>, "d"), equal(<Array>, "f"))), less(<Array>, 5))
 See $.data for the source Arrow object',
   fixed = TRUE
   )
diff --git a/r/tests/testthat/test-expression.R b/r/tests/testthat/test-expression.R
index 1251cf0e7c5..3c100812ff1 100644
--- a/r/tests/testthat/test-expression.R
+++ b/r/tests/testthat/test-expression.R
@@ -29,7 +29,7 @@ test_that("array_expression print method", {
   expect_output(
     print(build_array_expression(">", Array$create(1:5), 4)),
     # Not ideal but it is informative
-    "greater(<Array>, 4L)",
+    "greater(<Array>, 4)",
     fixed = TRUE
   )
 })
@@ -84,4 +84,3 @@ test_that("Can create an expression", {
   expect_is(expr, "array_expression")
   expect_equal(eval_array_expression(expr), Array$create(0:4))
 })
-

From 8e9cbf834fc935248aef637b20050f8623e018f1 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Wed, 30 Dec 2020 14:16:42 -0800
Subject: [PATCH 6/9] Add jira to skips

---
 r/tests/testthat/test-dataset.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index fbdf5307cea..2aae3bd31bf 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -601,7 +601,7 @@ test_that("filter() with expressions", {
     )
   )
 
-  skip("Implicit casts aren't being inserted everywhere they need to be")
+  skip("Implicit casts aren't being inserted everywhere they need to be (ARROW-11080)")
   # Error: NotImplemented: Function multiply_checked has no kernel matching input types (scalar[double], array[int32])
   expect_equivalent(
     ds %>%
@@ -615,7 +615,7 @@ test_that("filter() with expressions", {
     )
   )
 
-  skip("Implicit casts are only inserted for scalars")
+  skip("Implicit casts are only inserted for scalars (ARROW-11080)")
   # Error: NotImplemented: Function add_checked has no kernel matching input types (array[double], array[int32])
   expect_equivalent(
     ds %>%

From 7e3a2db05e73d5336584ff70ab3d4e78e405e525 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Thu, 31 Dec 2020 10:00:57 -0600
Subject: [PATCH 7/9] Remove datetime hackery

---
 r/R/expression.R                      | 6 ------
 r/tests/testthat/test-compute-arith.R | 8 --------
 2 files changed, 14 deletions(-)

diff --git a/r/R/expression.R b/r/R/expression.R
index bd944a27a59..9a5e575183d 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -81,12 +81,6 @@ build_array_expression <- function(.Generic, e1, e2, ...) {
       return(build_array_expression("-", e1, base))
     }
 
-    # hack to use subtract instead of subtract_checked for timestamps
-    if (inherits(e1$type, "Timestamp") && inherits(e2$type, "Timestamp") && .Generic == "-"){
-      # don't use the checked variant for timestamp
-      return(array_expression("subtract", e1, e2, ...))
-    }
-
     expr <- array_expression(.binary_function_map[[.Generic]], e1, e2, ...)
   }
   expr
diff --git a/r/tests/testthat/test-compute-arith.R b/r/tests/testthat/test-compute-arith.R
index 393fdf45455..ffde12c4d9b 100644
--- a/r/tests/testthat/test-compute-arith.R
+++ b/r/tests/testthat/test-compute-arith.R
@@ -76,11 +76,3 @@ test_that("Dates casting", {
   skip("autocasting should happen in compute kernels; R workaround fails on this ARROW-11078")
   expect_equal(a + 2, Array$create(c((Sys.Date() + 1:4 ) + 2), NA_integer_))
 })
-
-test_that("Datetimes", {
-  a <- Array$create(c(Sys.time() + 1:4, NA_integer_))
-  b <- Scalar$create(Sys.time())
-  result <- a - b
-  expect_is(result$type, "DataType")
-  expect_identical(result$type$ToString(), "duration[us]")
-})

From e4ec8321a7d1bd5e758b74c1830fb3941d519952 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Wed, 6 Jan 2021 14:44:29 -0800
Subject: [PATCH 8/9] Update JIRAs in skips

---
 r/src/arrowExports.cpp                | 2 +-
 r/tests/testthat/test-compute-arith.R | 4 ++--
 r/tests/testthat/test-dataset.R       | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 882c9469344..a66b69d0bab 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -5539,7 +5539,7 @@ END_CPP11
 }
 #else
 extern "C" SEXP _arrow_Scalar__ToString(SEXP s_sexp){
-	Rf_error("Cannot call Scalar__ToString(). Please use arrow::install_arrow() to install required runtime libraries. ");
+	Rf_error("Cannot call Scalar__ToString(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
diff --git a/r/tests/testthat/test-compute-arith.R b/r/tests/testthat/test-compute-arith.R
index ffde12c4d9b..d37367d47c8 100644
--- a/r/tests/testthat/test-compute-arith.R
+++ b/r/tests/testthat/test-compute-arith.R
@@ -31,7 +31,7 @@ test_that("Addition", {
   expect_error(casted + 127)
   expect_error(casted + 200)
 
-  skip("autocasting should happen in compute kernels; R workaround fails on this ARROW-11078")
+  skip("autocasting should happen in compute kernels; R workaround fails on this ARROW-8919")
   expect_type_equal(a + 4.1, float64())
   expect_equal(a + 4.1, Array$create(c(5.1, 6.1, 7.1, 8.1, NA_real_)))
 })
@@ -73,6 +73,6 @@ test_that("Division", {
 test_that("Dates casting", {
   a <- Array$create(c(Sys.Date() + 1:4, NA_integer_))
 
-  skip("autocasting should happen in compute kernels; R workaround fails on this ARROW-11078")
+  skip("autocasting should happen in compute kernels; R workaround fails on this ARROW-8919")
   expect_equal(a + 2, Array$create(c((Sys.Date() + 1:4 ) + 2), NA_integer_))
 })
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 2aae3bd31bf..5bdbc42410e 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -601,7 +601,7 @@ test_that("filter() with expressions", {
     )
   )
 
-  skip("Implicit casts aren't being inserted everywhere they need to be (ARROW-11080)")
+  skip("Implicit casts aren't being inserted everywhere they need to be (ARROW-8919)")
   # Error: NotImplemented: Function multiply_checked has no kernel matching input types (scalar[double], array[int32])
   expect_equivalent(
     ds %>%
@@ -615,7 +615,7 @@ test_that("filter() with expressions", {
     )
   )
 
-  skip("Implicit casts are only inserted for scalars (ARROW-11080)")
+  skip("Implicit casts are only inserted for scalars (ARROW-8919)")
   # Error: NotImplemented: Function add_checked has no kernel matching input types (array[double], array[int32])
   expect_equivalent(
     ds %>%

From a6b2a6f94cc8605afdca26ac844dfbd1b74d4760 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Thu, 7 Jan 2021 08:29:30 -0800
Subject: [PATCH 9/9] News

---
 r/NEWS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/r/NEWS.md b/r/NEWS.md
index ebf80ee1d81..a606c03b9cf 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -29,6 +29,7 @@
 
 ## Enhancements
 
+* Arithmetic operations (`+`, `*`, etc.) are supported on Arrays and ChunkedArrays and can be used in filter expressions in Arrow `dplyr` pipelines
 * Table columns can now be added, replaced, or removed by assigning (`<-`) with either `$` or `[[`
 * Column names of Tables and RecordBatches can be renamed by assigning `names()`
 * Large string types can now be written to Parquet files