Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion r/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ Collate:
'chunked-array.R'
'io.R'
'compression.R'
'scalar.R'
'compute.R'
'config.R'
'csv.R'
Expand All @@ -85,7 +86,6 @@ Collate:
'record-batch-writer.R'
'reexports-bit64.R'
'reexports-tidyselect.R'
'scalar.R'
'schema.R'
'struct.R'
'util.R'
15 changes: 15 additions & 0 deletions r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,17 @@ S3method(Ops,ChunkedArray)
S3method(Ops,Expression)
S3method(Ops,array_expression)
S3method(all,equal.ArrowObject)
S3method(as.character,Array)
S3method(as.character,ChunkedArray)
S3method(as.character,Scalar)
S3method(as.data.frame,RecordBatch)
S3method(as.data.frame,Table)
S3method(as.double,Array)
S3method(as.double,ChunkedArray)
S3method(as.double,Scalar)
S3method(as.integer,Array)
S3method(as.integer,ChunkedArray)
S3method(as.integer,Scalar)
S3method(as.list,RecordBatch)
S3method(as.list,Table)
S3method(as.raw,Buffer)
Expand Down Expand Up @@ -44,6 +53,9 @@ S3method(length,Array)
S3method(length,ChunkedArray)
S3method(length,Scalar)
S3method(length,Schema)
S3method(mean,Array)
S3method(mean,ChunkedArray)
S3method(mean,Scalar)
S3method(names,Dataset)
S3method(names,RecordBatch)
S3method(names,ScannerBuilder)
Expand All @@ -58,6 +70,9 @@ S3method(read_message,MessageReader)
S3method(read_message,default)
S3method(row.names,RecordBatch)
S3method(row.names,Table)
S3method(sum,Array)
S3method(sum,ChunkedArray)
S3method(sum,Scalar)
S3method(tail,Array)
S3method(tail,ChunkedArray)
S3method(tail,RecordBatch)
Expand Down
9 changes: 9 additions & 0 deletions r/R/array.R
Original file line number Diff line number Diff line change
Expand Up @@ -341,3 +341,12 @@ is.Array <- function(x, type = NULL) {
}
is_it
}

#' @export
as.double.Array <- function(x, ...) as.double(as.vector(x), ...)

#' @export
as.integer.Array <- function(x, ...) as.integer(as.vector(x), ...)

#' @export
as.character.Array <- function(x, ...) as.character(as.vector(x), ...)
11 changes: 9 additions & 2 deletions r/R/chunked-array.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,6 @@ ChunkedArray <- R6Class("ChunkedArray", inherit = ArrowObject,
if (is.integer(i)) {
i <- Array$create(i)
}
# Invalid: Tried executing function with non-value type: ChunkedArray
# so use old methods
shared_ptr(ChunkedArray, call_function("take", self, i))
},
Filter = function(i, keep_na = TRUE) {
Expand Down Expand Up @@ -151,3 +149,12 @@ head.ChunkedArray <- head.Array

#' @export
tail.ChunkedArray <- tail.Array

#' @export
as.double.ChunkedArray <- as.double.Array

#' @export
as.integer.ChunkedArray <- as.integer.Array

#' @export
as.character.ChunkedArray <- as.character.Array
71 changes: 65 additions & 6 deletions r/R/compute.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,73 @@
# under the License.

#' @include array.R
#' @include chunked-array.R
#' @include scalar.R

call_function <- function(function_name, ..., options = list()) {
assert_that(is.string(function_name))
compute__CallFunction(function_name, list(...), options)
}

#' @export
sum.Array <- function(..., na.rm = FALSE) scalar_aggregate("sum", ..., na.rm = na.rm)

#' @export
sum.ChunkedArray <- sum.Array

#' @export
sum.Scalar <- sum.Array

#' @export
mean.Array <- function(..., na.rm = FALSE) scalar_aggregate("mean", ..., na.rm = na.rm)

#' @export
mean.ChunkedArray <- mean.Array

#' @export
mean.Scalar <- mean.Array

min.Array <- function(..., na.rm = FALSE) {
extrema <- scalar_aggregate("minmax", ..., na.rm = na.rm)
# TODO: StructScalar needs field accessor methods in C++: ARROW-9070
Scalar$create(as.vector(extrema)$min)
}

scalar_aggregate <- function(FUN, ..., na.rm = FALSE) {
a <- collect_arrays_from_dots(list(...))
if (!na.rm && a$null_count > 0) {
# Arrow sum/mean function always drops NAs so handle that here
# https://issues.apache.org/jira/browse/ARROW-9054
Scalar$create(NA_integer_, type = a$type)
} else {
if (inherits(a$type, "Boolean")) {
# Bool sum/mean not implemented so cast to int
# https://issues.apache.org/jira/browse/ARROW-9055
a <- a$cast(int8())
}
shared_ptr(Scalar, call_function(FUN, a))
}
}

collect_arrays_from_dots <- function(dots) {
# Given a list that may contain both Arrays and ChunkedArrays,
# return a single ChunkedArray containing all of those chunks
# (may return a regular Array if there is only one element in dots)
assert_that(all(map_lgl(dots, is.Array)))
if (length(dots) == 1) {
return(dots[[1]])
}

arrays <- unlist(lapply(dots, function(x) {
if (inherits(x, "ChunkedArray")) {
x$chunks
} else {
x
}
}))
ChunkedArray$create(!!!arrays)
}

CastOptions <- R6Class("CastOptions", inherit = ArrowObject)

#' Cast options
Expand All @@ -32,12 +93,10 @@ CastOptions <- R6Class("CastOptions", inherit = ArrowObject)
#' @param allow_float_truncate allow float truncate, `!safe` by default
#'
#' @export
cast_options <- function(
safe = TRUE,
allow_int_overflow = !safe,
allow_time_truncate = !safe,
allow_float_truncate = !safe
){
cast_options <- function(safe = TRUE,
allow_int_overflow = !safe,
allow_time_truncate = !safe,
allow_float_truncate = !safe) {
shared_ptr(CastOptions,
compute___CastOptions__initialize(allow_int_overflow, allow_time_truncate, allow_float_truncate)
)
Expand Down
2 changes: 0 additions & 2 deletions r/R/record-batch.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,6 @@ RecordBatch <- R6Class("RecordBatch", inherit = ArrowObject,
i <- Array$create(i)
}
assert_is(i, "Array")
# Invalid: Tried executing function with non-value type: RecordBatch
# so use old methods
shared_ptr(RecordBatch, call_function("take", self, i))
},
Filter = function(i, keep_na = TRUE) {
Expand Down
10 changes: 10 additions & 0 deletions r/R/scalar.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ Scalar <- R6Class("Scalar", inherit = ArrowObject,
),
active = list(
is_valid = function() Scalar__is_valid(self),
null_count = function() sum(!self$is_valid),
type = function() DataType$create(Scalar__type(self))
)
)
Expand All @@ -62,3 +63,12 @@ is.na.Scalar <- function(x) !x$is_valid

#' @export
as.vector.Scalar <- function(x, mode) x$as_vector()

#' @export
as.double.Scalar <- as.double.Array

#' @export
as.integer.Scalar <- as.integer.Array

#' @export
as.character.Scalar <- as.character.Array
2 changes: 0 additions & 2 deletions r/R/table.R
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,6 @@ Table <- R6Class("Table", inherit = ArrowObject,
if (is.integer(i)) {
i <- Array$create(i)
}
# Invalid: Tried executing function with non-value type: Table
# so use old methods
shared_ptr(Table, call_function("take", self, i))
},
Filter = function(i, keep_na = TRUE) {
Expand Down
6 changes: 6 additions & 0 deletions r/src/compute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,12 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
return out;
}

if (func_name == "minmax") {
auto out = std::make_shared<arrow::compute::MinMaxOptions>(
arrow::compute::MinMaxOptions::Defaults());
return out;
}

return nullptr;
}

Expand Down
96 changes: 96 additions & 0 deletions r/tests/testthat/test-compute.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,102 @@

context("compute")

test_that("sum.Array", {
ints <- 1:5
a <- Array$create(ints)
expect_is(sum(a), "Scalar")
expect_identical(as.integer(sum(a)), sum(ints))

floats <- c(1.3, 2.4, 3)
f <- Array$create(floats)
expect_identical(as.numeric(sum(f)), sum(floats))

floats <- c(floats, NA)
na <- Array$create(floats)
expect_identical(as.numeric(sum(na)), sum(floats))
expect_is(sum(na, na.rm = TRUE), "Scalar")
expect_identical(as.numeric(sum(na, na.rm = TRUE)), sum(floats, na.rm = TRUE))

bools <- c(TRUE, TRUE, FALSE)
b <- Array$create(bools)
expect_identical(as.integer(sum(b)), sum(bools))
})

test_that("sum.ChunkedArray", {
a <- ChunkedArray$create(1:4, c(1:4, NA), 1:5)
expect_is(sum(a), "Scalar")
expect_true(is.na(as.vector(sum(a))))
expect_identical(as.numeric(sum(a, na.rm = TRUE)), 35)
})

test_that("sum dots", {
a1 <- Array$create(1:4)
a2 <- ChunkedArray$create(1:4, c(1:4, NA), 1:5)
expect_identical(as.numeric(sum(a1, a2, na.rm = TRUE)), 45)
})

test_that("sum.Scalar", {
skip("No sum method in arrow for Scalar: ARROW-9056")
s <- Scalar$create(4)
expect_identical(as.numeric(s), as.numeric(sum(s)))
})

test_that("mean.Array", {
ints <- 1:4
a <- Array$create(ints)
expect_is(mean(a), "Scalar")
expect_identical(as.vector(mean(a)), mean(ints))

floats <- c(1.3, 2.4, 3)
f <- Array$create(floats)
expect_identical(as.vector(mean(f)), mean(floats))

floats <- c(floats, NA)
na <- Array$create(floats)
expect_identical(as.vector(mean(na)), mean(floats))
expect_is(mean(na, na.rm = TRUE), "Scalar")
expect_identical(as.vector(mean(na, na.rm = TRUE)), mean(floats, na.rm = TRUE))

bools <- c(TRUE, TRUE, FALSE)
b <- Array$create(bools)
expect_identical(as.vector(mean(b)), mean(bools))
})

test_that("mean.ChunkedArray", {
a <- ChunkedArray$create(1:4, c(1:4, NA), 1:5)
expect_is(mean(a), "Scalar")
expect_true(is.na(as.vector(mean(a))))
expect_identical(as.vector(mean(a, na.rm = TRUE)), 35/13)
})

test_that("mean.Scalar", {
skip("No mean method in arrow for Scalar: ARROW-9056")
s <- Scalar$create(4)
expect_identical(as.vector(s), mean(s))
})

test_that("Bad input handling of call_function", {
expect_error(call_function("sum", 2, 3), "to_datum: Not implemented for type double")
})

test_that("min/max.Array", {
skip("arrow::MakeArrayFromScalar can't handle struct: ARROW-6604")
ints <- 1:4
a <- Array$create(ints)
expect_is(min(a), "Scalar")
expect_identical(as.vector(min(a)), min(ints))

floats <- c(1.3, 3, 2.4)
f <- Array$create(floats)
expect_identical(as.vector(min(f)), min(floats))

floats <- c(floats, NA)
na <- Array$create(floats)
expect_identical(as.vector(min(na)), min(floats))
expect_is(min(na, na.rm = TRUE), "Scalar")
expect_identical(as.vector(min(na, na.rm = TRUE)), min(floats, na.rm = TRUE))

bools <- c(TRUE, TRUE, FALSE)
b <- Array$create(bools)
expect_identical(as.vector(min(b)), min(bools))
})