Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions r/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Imports:
purrr,
R6,
rlang,
stats,
tidyselect,
utils,
vctrs
Expand Down
5 changes: 5 additions & 0 deletions r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ S3method(match_arrow,ArrowDatum)
S3method(match_arrow,default)
S3method(max,ArrowDatum)
S3method(mean,ArrowDatum)
S3method(median,ArrowDatum)
S3method(min,ArrowDatum)
S3method(names,Dataset)
S3method(names,FeatherReader)
Expand All @@ -73,6 +74,7 @@ S3method(print,array_expression)
S3method(print,arrow_dplyr_query)
S3method(print,arrow_info)
S3method(print,arrow_r_metadata)
S3method(quantile,ArrowDatum)
S3method(read_message,InputStream)
S3method(read_message,MessageReader)
S3method(read_message,default)
Expand Down Expand Up @@ -152,6 +154,7 @@ export(ParquetFileWriter)
export(ParquetVersionType)
export(ParquetWriterProperties)
export(Partitioning)
export(QuantileInterpolation)
export(RandomAccessFile)
export(ReadableFile)
export(RecordBatchFileReader)
Expand Down Expand Up @@ -308,6 +311,8 @@ importFrom(rlang,seq2)
importFrom(rlang,set_names)
importFrom(rlang,syms)
importFrom(rlang,warn)
importFrom(stats,median)
importFrom(stats,quantile)
Comment thread
ianmcook marked this conversation as resolved.
importFrom(tidyselect,contains)
importFrom(tidyselect,ends_with)
importFrom(tidyselect,eval_select)
Expand Down
1 change: 1 addition & 0 deletions r/R/arrow-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.

#' @importFrom stats quantile median
#' @importFrom R6 R6Class
#' @importFrom purrr as_mapper map map2 map_chr map_dfr map_int map_lgl keep
#' @importFrom assertthat assert_that is.string
Expand Down
43 changes: 43 additions & 0 deletions r/R/compute.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,49 @@ collect_arrays_from_dots <- function(dots) {
ChunkedArray$create(!!!arrays)
}

#' @export
quantile.ArrowDatum <- function(x,
probs = seq(0, 1, 0.25),
na.rm = FALSE,
type = 7,
interpolation = c("linear", "lower", "higher", "nearest", "midpoint"),
...) {
if (inherits(x, "Scalar")) x <- Array$create(x)
assert_is(probs, c("numeric", "integer"))
assert_that(length(probs) > 0)
assert_that(all(probs >= 0 & probs <= 1))
if (!na.rm && x$null_count > 0) {
stop("Missing values not allowed if 'na.rm' is FALSE", call. = FALSE)
Comment thread
ianmcook marked this conversation as resolved.
}
if (type != 7) {
stop(
"Argument `type` not supported in Arrow. To control the quantile ",
"interpolation algorithm, set argument `interpolation` to one of: ",
"\"linear\" (the default), \"lower\", \"higher\", \"nearest\", or ",
"\"midpoint\".",
call. = FALSE
)
}
interpolation <- QuantileInterpolation[[toupper(match.arg(interpolation))]]
out <- call_function("quantile", x, options = list(q = probs, interpolation = interpolation))
if (length(out) == 0) {
Comment thread
ianmcook marked this conversation as resolved.
# When there are no non-missing values in the data, the Arrow quantile
# function returns an empty Array, but for consistency with the R quantile
# function, we want an Array of NA_real_ with the same length as probs
out <- Array$create(rep(NA_real_, length(probs)))
}
out
}

#' @export
median.ArrowDatum <- function(x, na.rm = FALSE, ...) {
if (!na.rm && x$null_count > 0) {
Scalar$create(NA_real_)
} else {
Scalar$create(quantile(x, probs = 0.5, na.rm = TRUE, ...))
}
}

#' @export
unique.ArrowDatum <- function(x, incomparables = FALSE, ...) {
call_function("unique", x)
Expand Down
6 changes: 6 additions & 0 deletions r/R/enums.R
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,9 @@ ParquetVersionType <- enum("ParquetVersionType",
MetadataVersion <- enum("MetadataVersion",
V1 = 0L, V2 = 1L, V3 = 2L, V4 = 3L, V5 = 4L
)

#' @export
#' @rdname enums
QuantileInterpolation <- enum("QuantileInterpolation",
LINEAR = 0L, LOWER = 1L, HIGHER = 2L, NEAREST = 3L, MIDPOINT = 4L
)
5 changes: 5 additions & 0 deletions r/man/enums.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions r/src/compute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,23 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
return out;
}

if (func_name == "quantile") {
using Options = arrow::compute::QuantileOptions;
auto out = std::make_shared<Options>(Options::Defaults());
SEXP q = options["q"];
if (!Rf_isNull(q) && TYPEOF(q) == REALSXP) {
out->q = cpp11::as_cpp<std::vector<double>>(q);
}
SEXP interpolation = options["interpolation"];
if (!Rf_isNull(interpolation) && TYPEOF(interpolation) == INTSXP &&
XLENGTH(interpolation) == 1) {
out->interpolation =
cpp11::as_cpp<enum arrow::compute::QuantileOptions::Interpolation>(
interpolation);
}
return out;
}

if (func_name == "is_in" || func_name == "index_in") {
using Options = arrow::compute::SetLookupOptions;
return std::make_shared<Options>(cpp11::as_cpp<arrow::Datum>(options["value_set"]),
Expand Down
107 changes: 107 additions & 0 deletions r/tests/testthat/test-compute-aggregate.R
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,113 @@ test_that("Edge cases", {
}
})

test_that("quantile.Array and quantile.ChunkedArray", {
a <- Array$create(c(0, 1, 2, 3))
ca <- ChunkedArray$create(c(0, 1), c(2, 3))
probs <- c(0.49, 0.51)
for(ad in list(a, ca)) {
for (type in c(int32(), uint64(), float64())) {
expect_equal(
quantile(ad$cast(type), probs = probs, interpolation = "linear"),
Array$create(c(1.47, 1.53))
)
expect_equal(
quantile(ad$cast(type), probs = probs, interpolation = "lower"),
Array$create(c(1, 1))$cast(type)
)
expect_equal(
quantile(ad$cast(type), probs = probs, interpolation = "higher"),
Array$create(c(2, 2))$cast(type)
)
expect_equal(
quantile(ad$cast(type), probs = probs, interpolation = "nearest"),
Array$create(c(1, 2))$cast(type)
)
expect_equal(
quantile(ad$cast(type), probs = probs, interpolation = "midpoint"),
Array$create(c(1.5, 1.5))
)
}
}
})

test_that("quantile and median NAs, edge cases, and exceptions", {
expect_equal(
quantile(Array$create(c(1, 2)), probs = c(0, 1)),
Array$create(c(1, 2))
)
expect_error(
quantile(Array$create(c(1, 2, NA))),
"Missing values not allowed if 'na.rm' is FALSE"
)
expect_equal(
quantile(Array$create(numeric(0))),
Array$create(rep(NA_real_, 5))
)
expect_equal(
quantile(Array$create(rep(NA_integer_, 3)), na.rm = TRUE),
Array$create(rep(NA_real_, 5))
)
expect_error(
median(Array$create(c(1, 2)), probs = c(.25, .75))
)
expect_equal(
median(Array$create(c(1, 2)), interpolation = "higher"),
Scalar$create(2)
)
expect_equal(
quantile(Scalar$create(0L)),
Array$create(rep(0, 5))
)
expect_equal(
median(Scalar$create(1L)),
Scalar$create(1)
)
expect_error(
quantile(Array$create(1:3), type = 9),
"not supported"
)
})

test_that("median.Array and median.ChunkedArray", {
expect_vector_equal(
median(input),
1:4
)
expect_vector_equal(
median(input),
1:5
)
expect_vector_equal(
median(input),
numeric(0)
)
expect_vector_equal(
median(input, na.rm = FALSE),
c(1, 2, NA)
)
expect_vector_equal(
median(input, na.rm = TRUE),
c(1, 2, NA)
)
expect_vector_equal(
median(input, na.rm = TRUE),
NA_real_
)
expect_vector_equal(
median(input, na.rm = FALSE),
c(1, 2, NA)
)
expect_vector_equal(
median(input, na.rm = TRUE),
c(1, 2, NA)
)
expect_vector_equal(
median(input, na.rm = TRUE),
NA_real_
)
})

test_that("unique.Array", {
a <- Array$create(c(1, 4, 3, 1, 1, 3, 4))
expect_equal(unique(a), Array$create(c(1, 4, 3)))
Expand Down