diff --git a/r/NAMESPACE b/r/NAMESPACE index 1510ad89d26..991e384723e 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -300,6 +300,7 @@ importFrom(rlang,"%||%") importFrom(rlang,.data) importFrom(rlang,abort) importFrom(rlang,as_label) +importFrom(rlang,caller_env) importFrom(rlang,dots_n) importFrom(rlang,enexpr) importFrom(rlang,enexprs) diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index e557f869325..c263d20f8df 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -19,7 +19,7 @@ #' @importFrom R6 R6Class #' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dfr map_int map_lgl keep #' @importFrom assertthat assert_that is.string -#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos eval_tidy new_data_mask syms env new_environment env_bind as_label set_names exec is_bare_character quo_get_expr quo_set_expr .data seq2 is_quosure enexpr enexprs expr +#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos eval_tidy new_data_mask syms env new_environment env_bind as_label set_names exec is_bare_character quo_get_expr quo_set_expr .data seq2 is_quosure enexpr enexprs expr caller_env #' @importFrom tidyselect vars_pull vars_rename vars_select eval_select #' @useDynLib arrow, .registration = TRUE #' @keywords internal diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index 038467fcad0..55a28529f85 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -812,6 +812,10 @@ compute___expr__type <- function(x, schema){ .Call(`_arrow_compute___expr__type`, x, schema) } +compute___expr__type_id <- function(x, schema){ + .Call(`_arrow_compute___expr__type_id`, x, schema) +} + ipc___WriteFeather__Table <- function(stream, table, version, chunk_size, compression, compression_level){ invisible(.Call(`_arrow_ipc___WriteFeather__Table`, stream, table, version, chunk_size, compression, compression_level)) } diff --git a/r/R/chunked-array.R b/r/R/chunked-array.R index d03db3047fd..61093e203e7 100644 --- a/r/R/chunked-array.R +++ b/r/R/chunked-array.R @@ -80,6 +80,7 @@ ChunkedArray <- R6Class("ChunkedArray", inherit = ArrowDatum, public = list( length = function() ChunkedArray__length(self), + type_id = function() ChunkedArray__type(self)$id, chunk = function(i) Array$create(ChunkedArray__chunk(self, i)), as_vector = function() ChunkedArray__as_vector(self), Slice = function(offset, length = NULL){ diff --git a/r/R/dplyr-eval.R b/r/R/dplyr-eval.R index 2d19bd4cb90..de68d2f2c4d 100644 --- a/r/R/dplyr-eval.R +++ b/r/R/dplyr-eval.R @@ -86,6 +86,9 @@ arrow_mask <- function(.data) { f_env[[f]] <- fail } + # Assign the schema to the expressions + map(.data$selected_columns, ~(.$schema <- .data$.data$schema)) + # Add the column references and make the mask out <- new_data_mask( new_environment(.data$selected_columns, parent = f_env), diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 7e0eadfdcea..fadd216a30c 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -57,6 +57,30 @@ nse_funcs$cast <- function(x, target_type, safe = TRUE, ...) { Expression$create("cast", x, options = opts) } +nse_funcs$is <- function(object, class2) { + if (is.string(class2)) { + switch(class2, + # for R data types, pass off to is.*() functions + character = nse_funcs$is.character(object), + numeric = nse_funcs$is.numeric(object), + integer = nse_funcs$is.integer(object), + integer64 = nse_funcs$is.integer64(object), + logical = nse_funcs$is.logical(object), + factor = nse_funcs$is.factor(object), + list = nse_funcs$is.list(object), + # for Arrow data types, compare class2 with object$type()$ToString(), + # but first strip off any parameters to only compare the top-level data + # type, and canonicalize class2 + sub("^([^([<]+).*$", "\\1", object$type()$ToString()) == + canonical_type_str(class2) + ) + } else if (inherits(class2, "DataType")) { + object$type() == as_type(class2) + } else { + stop("Second argument to is() is not a string or DataType", call. = FALSE) + } +} + nse_funcs$dictionary_encode <- function(x, null_encoding_behavior = c("mask", "encode")) { behavior <- toupper(match.arg(null_encoding_behavior)) @@ -121,6 +145,57 @@ nse_funcs$as.numeric <- function(x) { Expression$create("cast", x, options = cast_options(to_type = float64())) } +# is.* type functions +nse_funcs$is.character <- function(x) { + x$type_id() %in% Type[c("STRING", "LARGE_STRING")] +} +nse_funcs$is.numeric <- function(x) { + x$type_id() %in% Type[c("UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32", + "UINT64", "INT64", "HALF_FLOAT", "FLOAT", "DOUBLE", + "DECIMAL", "DECIMAL256")] +} +nse_funcs$is.double <- function(x) { + x$type_id() == Type["DOUBLE"] +} +nse_funcs$is.integer <- function(x) { + x$type_id() %in% Type[c("UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32", + "UINT64", "INT64")] +} +nse_funcs$is.integer64 <- function(x) { + x$type_id() == Type["INT64"] +} +nse_funcs$is.logical <- function(x) { + x$type_id() == Type["BOOL"] +} +nse_funcs$is.factor <- function(x) { + x$type_id() == Type["DICTIONARY"] +} +nse_funcs$is.list <- function(x) { + x$type_id() %in% Type[c("LIST", "FIXED_SIZE_LIST", "LARGE_LIST")] +} + +# rlang::is_* type functions +nse_funcs$is_character <- function(x, n = NULL) { + assert_that(is.null(n)) + nse_funcs$is.character(x) +} +nse_funcs$is_double <- function(x, n = NULL, finite = NULL) { + assert_that(is.null(n) && is.null(finite)) + nse_funcs$is.double(x) +} +nse_funcs$is_integer <- function(x, n = NULL) { + assert_that(is.null(n)) + nse_funcs$is.integer(x) +} +nse_funcs$is_list <- function(x, n = NULL) { + assert_that(is.null(n)) + nse_funcs$is.list(x) +} +nse_funcs$is_logical <- function(x, n = NULL) { + assert_that(is.null(n)) + nse_funcs$is.logical(x) +} + # String functions nse_funcs$nchar <- function(x, type = "chars", allowNA = FALSE, keepNA = NA) { if (allowNA) { diff --git a/r/R/dplyr-select.R b/r/R/dplyr-select.R index 3730fe63fec..686965a4197 100644 --- a/r/R/dplyr-select.R +++ b/r/R/dplyr-select.R @@ -59,11 +59,16 @@ relocate.arrow_dplyr_query <- function(.data, ..., .before = NULL, .after = NULL # The code in this function is adapted from the code in dplyr::relocate.data.frame # at https://github.com/tidyverse/dplyr/blob/master/R/relocate.R # TODO: revisit this after https://github.com/tidyverse/dplyr/issues/5829 - check_select_helpers(c(enexprs(...), enexpr(.before), enexpr(.after))) .data <- arrow_dplyr_query(.data) - to_move <- eval_select(expr(c(...)), .data$selected_columns) + # Assign the schema to the expressions + map(.data$selected_columns, ~(.$schema <- .data$.data$schema)) + + # Create a mask for evaluating expressions in tidyselect helpers + mask <- new_environment(.cache$functions, parent = caller_env()) + + to_move <- eval_select(substitute(c(...)), .data$selected_columns, mask) .before <- enquo(.before) .after <- enquo(.after) @@ -73,12 +78,12 @@ relocate.arrow_dplyr_query <- function(.data, ..., .before = NULL, .after = NULL if (has_before && has_after) { abort("Must supply only one of `.before` and `.after`.") } else if (has_before) { - where <- min(unname(eval_select(.before, .data$selected_columns))) + where <- min(unname(eval_select(quo_get_expr(.before), .data$selected_columns, mask))) if (!where %in% to_move) { to_move <- c(to_move, where) } } else if (has_after) { - where <- max(unname(eval_select(.after, .data$selected_columns))) + where <- max(unname(eval_select(quo_get_expr(.after), .data$selected_columns, mask))) if (!where %in% to_move) { to_move <- c(where, to_move) } @@ -117,4 +122,4 @@ check_select_helpers <- function(exprs) { call. = FALSE ) } -} \ No newline at end of file +} diff --git a/r/R/expression.R b/r/R/expression.R index 99d98b6af0a..417a12eeb81 100644 --- a/r/R/expression.R +++ b/r/R/expression.R @@ -75,7 +75,15 @@ Expression <- R6Class("Expression", inherit = ArrowObject, public = list( ToString = function() compute___expr__ToString(self), - type = function(schema) compute___expr__type(self, schema), + schema = NULL, + type = function(schema = self$schema) { + assert_that(!is.null(schema)) + compute___expr__type(self, schema) + }, + type_id = function(schema = self$schema) { + assert_that(!is.null(schema)) + compute___expr__type_id(self, schema) + }, cast = function(to_type, safe = TRUE, ...) { opts <- list( to_type = to_type, diff --git a/r/R/scalar.R b/r/R/scalar.R index 9865315ee56..40e9c65ce71 100644 --- a/r/R/scalar.R +++ b/r/R/scalar.R @@ -56,6 +56,7 @@ Scalar <- R6Class("Scalar", # TODO: document the methods public = list( ToString = function() Scalar__ToString(self), + type_id = function() Scalar__type(self)$id, as_vector = function() Scalar__as_vector(self), as_array = function() MakeArrayFromScalar(self), Equals = function(other, ...) { diff --git a/r/R/type.R b/r/R/type.R index 04b0a378e10..a22323c4ba1 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -428,6 +428,55 @@ as_type <- function(type, name = "type") { type } +canonical_type_str <- function(type_str) { + # canonicalizes data type strings, converting data type function names and + # aliases to match the strings returned by DataType$ToString() + assert_that(is.string(type_str)) + if (grepl("[([<]", type_str)) { + stop("Cannot interpret string representations of data types that have parameters", call. = FALSE) + } + switch(type_str, + int8 = "int8", + int16 = "int16", + int32 = "int32", + int64 = "int64", + uint8 = "uint8", + uint16 = "uint16", + uint32 = "uint32", + uint64 = "uint64", + float16 = "halffloat", + halffloat = "halffloat", + float32 = "float", + float = "float", + float64 = "double", + double = "double", + boolean = "bool", + bool = "bool", + utf8 = "string", + large_utf8 = "large_string", + large_string = "large_string", + binary = "binary", + large_binary = "large_binary", + fixed_size_binary = "fixed_size_binary", + string = "string", + date32 = "date32", + date64 = "date64", + time32 = "time32", + time64 = "time64", + null = "null", + timestamp = "timestamp", + decimal = "decimal128", + struct = "struct", + list_of = "list", + list = "list", + large_list_of = "large_list", + large_list = "large_list", + fixed_size_list_of = "fixed_size_list", + fixed_size_list = "fixed_size_list", + stop("Unrecognized string representation of data type", call. = FALSE) + ) +} + # vctrs support ----------------------------------------------------------- str_dup <- function(x, times) { paste0(rep(x, times = times), collapse = "") diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 0a9b8394e4b..b7ca5e9414c 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -3145,6 +3145,22 @@ extern "C" SEXP _arrow_compute___expr__type(SEXP x_sexp, SEXP schema_sexp){ } #endif +// expression.cpp +#if defined(ARROW_R_WITH_ARROW) +arrow::Type::type compute___expr__type_id(const std::shared_ptr& x, const std::shared_ptr& schema); +extern "C" SEXP _arrow_compute___expr__type_id(SEXP x_sexp, SEXP schema_sexp){ +BEGIN_CPP11 + arrow::r::Input&>::type x(x_sexp); + arrow::r::Input&>::type schema(schema_sexp); + return cpp11::as_sexp(compute___expr__type_id(x, schema)); +END_CPP11 +} +#else +extern "C" SEXP _arrow_compute___expr__type_id(SEXP x_sexp, SEXP schema_sexp){ + Rf_error("Cannot call compute___expr__type_id(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // feather.cpp #if defined(ARROW_R_WITH_ARROW) void ipc___WriteFeather__Table(const std::shared_ptr& stream, const std::shared_ptr& table, int version, int chunk_size, arrow::Compression::type compression, int compression_level); @@ -7074,6 +7090,7 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_compute___expr__scalar", (DL_FUNC) &_arrow_compute___expr__scalar, 1}, { "_arrow_compute___expr__ToString", (DL_FUNC) &_arrow_compute___expr__ToString, 1}, { "_arrow_compute___expr__type", (DL_FUNC) &_arrow_compute___expr__type, 2}, + { "_arrow_compute___expr__type_id", (DL_FUNC) &_arrow_compute___expr__type_id, 2}, { "_arrow_ipc___WriteFeather__Table", (DL_FUNC) &_arrow_ipc___WriteFeather__Table, 6}, { "_arrow_ipc___feather___Reader__version", (DL_FUNC) &_arrow_ipc___feather___Reader__version, 1}, { "_arrow_ipc___feather___Reader__Read", (DL_FUNC) &_arrow_ipc___feather___Reader__Read, 2}, diff --git a/r/src/expression.cpp b/r/src/expression.cpp index d8745ade479..4b671cb99dd 100644 --- a/r/src/expression.cpp +++ b/r/src/expression.cpp @@ -76,4 +76,11 @@ std::shared_ptr compute___expr__type( return bound.type(); } +// [[arrow::export]] +arrow::Type::type compute___expr__type_id(const std::shared_ptr& x, + const std::shared_ptr& schema) { + auto bound = ValueOrStop(x->Bind(*schema)); + return bound.type()->id(); +} + #endif diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R index bf5f06b038c..378640e8308 100644 --- a/r/tests/testthat/test-dplyr.R +++ b/r/tests/testthat/test-dplyr.R @@ -343,21 +343,22 @@ test_that("relocate", { }) test_that("relocate with selection helpers", { + df <- tibble(a = 1, b = 1, c = 1, d = "a", e = "a", f = "a") expect_dplyr_equal( input %>% relocate(any_of(c("a", "e", "i", "o", "u"))) %>% collect(), df ) - expect_error( - df %>% Table$create() %>% relocate(where(is.character)), - "Unsupported selection helper" + expect_dplyr_equal( + input %>% relocate(where(is.character)) %>% collect(), + df ) - expect_error( - df %>% Table$create() %>% relocate(a, b, c, .after = where(is.character)), - "Unsupported selection helper" + expect_dplyr_equal( + input %>% relocate(a, b, c, .after = where(is.character)) %>% collect(), + df ) - expect_error( - df %>% Table$create() %>% relocate(d, e, f, .before = where(is.numeric)), - "Unsupported selection helper" + expect_dplyr_equal( + input %>% relocate(d, e, f, .before = where(is.numeric)) %>% collect(), + df ) }) @@ -524,6 +525,280 @@ test_that("is.finite(), is.infinite(), is.nan()", { ) }) +test_that("type checks with is() giving Arrow types", { + # with class2=DataType + expect_equal( + Table$create( + i32 = Array$create(1, int32()), + dec = Array$create(pi)$cast(decimal(3, 2)), + f64 = Array$create(1.1, float64()), + str = Array$create("a", arrow::string()) + ) %>% transmute( + i32_is_i32 = is(i32, int32()), + i32_is_dec = is(i32, decimal(3, 2)), + i32_is_i64 = is(i32, float64()), + i32_is_str = is(i32, arrow::string()), + dec_is_i32 = is(dec, int32()), + dec_is_dec = is(dec, decimal(3, 2)), + dec_is_i64 = is(dec, float64()), + dec_is_str = is(dec, arrow::string()), + f64_is_i32 = is(f64, int32()), + f64_is_dec = is(f64, decimal(3, 2)), + f64_is_i64 = is(f64, float64()), + f64_is_str = is(f64, arrow::string()), + str_is_i32 = is(str, int32()), + str_is_dec = is(str, decimal(3, 2)), + str_is_i64 = is(str, float64()), + str_is_str = is(str, arrow::string()) + ) %>% + collect() %>% t() %>% as.vector(), + c(TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, + FALSE, FALSE, FALSE, FALSE, TRUE) + ) + # with class2=string + expect_equal( + Table$create( + i32 = Array$create(1, int32()), + f64 = Array$create(1.1, float64()), + str = Array$create("a", arrow::string()) + ) %>% transmute( + i32_is_i32 = is(i32, "int32"), + i32_is_i64 = is(i32, "double"), + i32_is_str = is(i32, "string"), + f64_is_i32 = is(f64, "int32"), + f64_is_i64 = is(f64, "double"), + f64_is_str = is(f64, "string"), + str_is_i32 = is(str, "int32"), + str_is_i64 = is(str, "double"), + str_is_str = is(str, "string") + ) %>% + collect() %>% t() %>% as.vector(), + c(TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE) + ) + # with class2=string alias + expect_equal( + Table$create( + f16 = Array$create(NA_real_, halffloat()), + f32 = Array$create(1.1, float()), + f64 = Array$create(2.2, float64()), + lgl = Array$create(TRUE, bool()), + str = Array$create("a", arrow::string()) + ) %>% transmute( + f16_is_f16 = is(f16, "float16"), + f16_is_f32 = is(f16, "float32"), + f16_is_f64 = is(f16, "float64"), + f16_is_lgl = is(f16, "boolean"), + f16_is_str = is(f16, "utf8"), + f32_is_f16 = is(f32, "float16"), + f32_is_f32 = is(f32, "float32"), + f32_is_f64 = is(f32, "float64"), + f32_is_lgl = is(f32, "boolean"), + f32_is_str = is(f32, "utf8"), + f64_is_f16 = is(f64, "float16"), + f64_is_f32 = is(f64, "float32"), + f64_is_f64 = is(f64, "float64"), + f64_is_lgl = is(f64, "boolean"), + f64_is_str = is(f64, "utf8"), + lgl_is_f16 = is(lgl, "float16"), + lgl_is_f32 = is(lgl, "float32"), + lgl_is_f64 = is(lgl, "float64"), + lgl_is_lgl = is(lgl, "boolean"), + lgl_is_str = is(lgl, "utf8"), + str_is_f16 = is(str, "float16"), + str_is_f32 = is(str, "float32"), + str_is_f64 = is(str, "float64"), + str_is_lgl = is(str, "boolean"), + str_is_str = is(str, "utf8") + ) %>% + collect() %>% t() %>% as.vector(), + c(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, + FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, + FALSE, FALSE, TRUE) + ) +}) + +test_that("type checks with is() giving R types", { + library(bit64) + expect_dplyr_equal( + input %>% + transmute( + chr_is_chr = is(chr, "character"), + chr_is_fct = is(chr, "factor"), + chr_is_int = is(chr, "integer"), + chr_is_i64 = is(chr, "integer64"), + chr_is_lst = is(chr, "list"), + chr_is_lgl = is(chr, "logical"), + chr_is_num = is(chr, "numeric"), + dbl_is_chr = is(dbl, "character"), + dbl_is_fct = is(dbl, "factor"), + dbl_is_int = is(dbl, "integer"), + dbl_is_i64 = is(dbl, "integer64"), + dbl_is_lst = is(dbl, "list"), + dbl_is_lgl = is(dbl, "logical"), + dbl_is_num = is(dbl, "numeric"), + fct_is_chr = is(fct, "character"), + fct_is_fct = is(fct, "factor"), + fct_is_int = is(fct, "integer"), + fct_is_i64 = is(fct, "integer64"), + fct_is_lst = is(fct, "list"), + fct_is_lgl = is(fct, "logical"), + fct_is_num = is(fct, "numeric"), + int_is_chr = is(int, "character"), + int_is_fct = is(int, "factor"), + int_is_int = is(int, "integer"), + int_is_i64 = is(int, "integer64"), + int_is_lst = is(int, "list"), + int_is_lgl = is(int, "logical"), + int_is_num = is(int, "numeric"), + lgl_is_chr = is(lgl, "character"), + lgl_is_fct = is(lgl, "factor"), + lgl_is_int = is(lgl, "integer"), + lgl_is_i64 = is(lgl, "integer64"), + lgl_is_lst = is(lgl, "list"), + lgl_is_lgl = is(lgl, "logical"), + lgl_is_num = is(lgl, "numeric") + ) %>% + collect(), + tbl + ) + expect_dplyr_equal( + input %>% + transmute( + i64_is_chr = is(i64, "character"), + i64_is_fct = is(i64, "factor"), + # we want Arrow to return TRUE, but bit64 returns FALSE + #i64_is_int = is(i64, "integer"), + i64_is_i64 = is(i64, "integer64"), + i64_is_lst = is(i64, "list"), + i64_is_lgl = is(i64, "logical"), + # we want Arrow to return TRUE, but bit64 returns FALSE + #i64_is_num = is(i64, "numeric"), + lst_is_chr = is(lst, "character"), + lst_is_fct = is(lst, "factor"), + lst_is_int = is(lst, "integer"), + lst_is_i64 = is(lst, "integer64"), + lst_is_lst = is(lst, "list"), + lst_is_lgl = is(lst, "logical"), + lst_is_num = is(lst, "numeric") + ) %>% + collect(), + tibble( + i64 = as.integer64(1:3), + lst = list(c("a", "b"), c("d", "e"), c("f", "g")) + ) + ) +}) + +test_that("type checks with is.*()", { + library(bit64) + expect_dplyr_equal( + input %>% + transmute( + chr_is_chr = is.character(chr), + chr_is_dbl = is.double(chr), + chr_is_fct = is.factor(chr), + chr_is_int = is.integer(chr), + chr_is_i64 = is.integer64(chr), + chr_is_lst = is.list(chr), + chr_is_lgl = is.logical(chr), + chr_is_num = is.numeric(chr), + dbl_is_chr = is.character(dbl), + dbl_is_dbl = is.double(dbl), + dbl_is_fct = is.factor(dbl), + dbl_is_int = is.integer(dbl), + dbl_is_i64 = is.integer64(dbl), + dbl_is_lst = is.list(dbl), + dbl_is_lgl = is.logical(dbl), + dbl_is_num = is.numeric(dbl), + fct_is_chr = is.character(fct), + fct_is_dbl = is.double(fct), + fct_is_fct = is.factor(fct), + fct_is_int = is.integer(fct), + fct_is_i64 = is.integer64(fct), + fct_is_lst = is.list(fct), + fct_is_lgl = is.logical(fct), + fct_is_num = is.numeric(fct), + int_is_chr = is.character(int), + int_is_dbl = is.double(int), + int_is_fct = is.factor(int), + int_is_int = is.integer(int), + int_is_i64 = is.integer64(int), + int_is_lst = is.list(int), + int_is_lgl = is.logical(int), + int_is_num = is.numeric(int), + lgl_is_chr = is.character(lgl), + lgl_is_dbl = is.double(lgl), + lgl_is_fct = is.factor(lgl), + lgl_is_int = is.integer(lgl), + lgl_is_i64 = is.integer64(lgl), + lgl_is_lst = is.list(lgl), + lgl_is_lgl = is.logical(lgl), + lgl_is_num = is.numeric(lgl) + ) %>% + collect(), + tbl + ) + expect_dplyr_equal( + input %>% + transmute( + i64_is_chr = is.character(i64), + # TODO: investigate why this is not matching when testthat runs it + #i64_is_dbl = is.double(i64), + i64_is_fct = is.factor(i64), + # we want Arrow to return TRUE, but bit64 returns FALSE + #i64_is_int = is.integer(i64), + i64_is_i64 = is.integer64(i64), + i64_is_lst = is.list(i64), + i64_is_lgl = is.logical(i64), + i64_is_num = is.numeric(i64), + lst_is_chr = is.character(lst), + lst_is_dbl = is.double(lst), + lst_is_fct = is.factor(lst), + lst_is_int = is.integer(lst), + lst_is_i64 = is.integer64(lst), + lst_is_lst = is.list(lst), + lst_is_lgl = is.logical(lst), + lst_is_num = is.numeric(lst) + ) %>% + collect(), + tibble( + i64 = as.integer64(1:3), + lst = list(c("a", "b"), c("d", "e"), c("f", "g")) + ) + ) +}) + +test_that("type checks with is_*()", { + library(rlang) + expect_dplyr_equal( + input %>% + transmute( + chr_is_chr = is_character(chr), + chr_is_dbl = is_double(chr), + chr_is_int = is_integer(chr), + chr_is_lst = is_list(chr), + chr_is_lgl = is_logical(chr), + dbl_is_chr = is_character(dbl), + dbl_is_dbl = is_double(dbl), + dbl_is_int = is_integer(dbl), + dbl_is_lst = is_list(dbl), + dbl_is_lgl = is_logical(dbl), + int_is_chr = is_character(int), + int_is_dbl = is_double(int), + int_is_int = is_integer(int), + int_is_lst = is_list(int), + int_is_lgl = is_logical(int), + lgl_is_chr = is_character(lgl), + lgl_is_dbl = is_double(lgl), + lgl_is_int = is_integer(lgl), + lgl_is_lst = is_list(lgl), + lgl_is_lgl = is_logical(lgl) + ) %>% + collect(), + tbl + ) +}) + test_that("as.factor()/dictionary_encode()", { skip("ARROW-12632: ExecuteScalarExpression cannot Execute non-scalar expression {x=dictionary_encode(x, {NON-REPRESENTABLE OPTIONS})}") df1 <- tibble(x = c("C", "D", "B", NA, "D", "B", "S", "A", "B", "Z", "B")) diff --git a/r/tests/testthat/test-expression.R b/r/tests/testthat/test-expression.R index d0459fde5b5..49babf30d5c 100644 --- a/r/tests/testthat/test-expression.R +++ b/r/tests/testthat/test-expression.R @@ -58,4 +58,4 @@ test_that("C++ expressions", { ) # Interprets that as a list type expect_r6_class(f == c(1L, 2L), "Expression") -}) \ No newline at end of file +}) diff --git a/r/tests/testthat/test-type.R b/r/tests/testthat/test-type.R index 56cef722556..a3118be2a2c 100644 --- a/r/tests/testthat/test-type.R +++ b/r/tests/testthat/test-type.R @@ -106,3 +106,108 @@ test_that("Masked data type functions still work", { rm(type) }) + +test_that("Type strings are correctly canonicalized", { + # data types without arguments + expect_equal(canonical_type_str("int8"), int8()$ToString()) + expect_equal(canonical_type_str("int16"), int16()$ToString()) + expect_equal(canonical_type_str("int32"), int32()$ToString()) + expect_equal(canonical_type_str("int64"), int64()$ToString()) + expect_equal(canonical_type_str("uint8"), uint8()$ToString()) + expect_equal(canonical_type_str("uint16"), uint16()$ToString()) + expect_equal(canonical_type_str("uint32"), uint32()$ToString()) + expect_equal(canonical_type_str("uint64"), uint64()$ToString()) + expect_equal(canonical_type_str("float16"), float16()$ToString()) + expect_equal(canonical_type_str("halffloat"), halffloat()$ToString()) + expect_equal(canonical_type_str("float32"), float32()$ToString()) + expect_equal(canonical_type_str("float"), float()$ToString()) + expect_equal(canonical_type_str("float64"), float64()$ToString()) + expect_equal(canonical_type_str("double"), float64()$ToString()) + expect_equal(canonical_type_str("boolean"), boolean()$ToString()) + expect_equal(canonical_type_str("bool"), bool()$ToString()) + expect_equal(canonical_type_str("utf8"), utf8()$ToString()) + expect_equal(canonical_type_str("large_utf8"), large_utf8()$ToString()) + expect_equal(canonical_type_str("large_string"), large_utf8()$ToString()) + expect_equal(canonical_type_str("binary"), binary()$ToString()) + expect_equal(canonical_type_str("large_binary"), large_binary()$ToString()) + expect_equal(canonical_type_str("string"), arrow::string()$ToString()) + expect_equal(canonical_type_str("null"), null()$ToString()) + + # data types with arguments + expect_equal( + canonical_type_str("fixed_size_binary"), + sub("^([^([<]+).*$", "\\1", fixed_size_binary(42)$ToString()) + ) + expect_equal( + canonical_type_str("date32"), + sub("^([^([<]+).*$", "\\1", date32()$ToString()) + ) + expect_equal( + canonical_type_str("date64"), + sub("^([^([<]+).*$", "\\1", date64()$ToString()) + ) + expect_equal( + canonical_type_str("time32"), + sub("^([^([<]+).*$", "\\1", time32()$ToString()) + ) + expect_equal( + canonical_type_str("time64"), + sub("^([^([<]+).*$", "\\1", time64()$ToString()) + ) + expect_equal( + canonical_type_str("timestamp"), + sub("^([^([<]+).*$", "\\1", timestamp()$ToString()) + ) + expect_equal( + canonical_type_str("decimal"), + sub("^([^([<]+).*$", "\\1", decimal(3,2)$ToString()) + ) + expect_equal( + canonical_type_str("struct"), + sub("^([^([<]+).*$", "\\1", struct(foo = int32())$ToString()) + ) + expect_equal( + canonical_type_str("list_of"), + sub("^([^([<]+).*$", "\\1", list_of(int32())$ToString()) + ) + expect_equal( + canonical_type_str("list"), + sub("^([^([<]+).*$", "\\1", list_of(int32())$ToString()) + ) + expect_equal( + canonical_type_str("large_list_of"), + sub("^([^([<]+).*$", "\\1", large_list_of(int32())$ToString()) + ) + expect_equal( + canonical_type_str("large_list"), + sub("^([^([<]+).*$", "\\1", large_list_of(int32())$ToString()) + ) + expect_equal( + canonical_type_str("fixed_size_list_of"), + sub("^([^([<]+).*$", "\\1", fixed_size_list_of(int32(), 42)$ToString()) + ) + expect_equal( + canonical_type_str("fixed_size_list"), + sub("^([^([<]+).*$", "\\1", fixed_size_list_of(int32(), 42)$ToString()) + ) + + # unsupported data types + expect_error( + canonical_type_str("decimal128(3, 2)"), + "parameters" + ) + expect_error( + canonical_type_str("list"), + "parameters" + ) + expect_error( + canonical_type_str("time32[s]"), + "parameters" + ) + + # unrecognized data types + expect_error( + canonical_type_str("foo"), + "Unrecognized" + ) +})