From c902f2fc48e1262a8b3569ba3394f06b247ff2f5 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Wed, 7 Apr 2021 18:02:52 -0400 Subject: [PATCH 1/3] Translate as.*() functions for simple R types plus bit64::as.integer64() --- r/R/dplyr.R | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/r/R/dplyr.R b/r/R/dplyr.R index 7c2ef4889ad..10dc669f6d4 100644 --- a/r/R/dplyr.R +++ b/r/R/dplyr.R @@ -396,6 +396,24 @@ build_function_list <- function(FUN) { # Include mappings from R function name spellings lapply(set_names(names(.array_function_map)), wrapper), # Plus some special handling where it's not 1:1 + as.character = function(x) { + FUN("cast", x, options = cast_options(to_type = string())) + }, + as.double = function(x) { + FUN("cast", x, options = cast_options(to_type = float64())) + }, + as.integer = function(x) { + FUN("cast", x, options = cast_options(to_type = int32())) + }, + as.integer64 = function(x) { + FUN("cast", x, options = cast_options(to_type = int64())) + }, + as.logical = function(x) { + FUN("cast", x, options = cast_options(to_type = boolean())) + }, + as.numeric = function(x) { + FUN("cast", x, options = cast_options(to_type = float64())) + }, nchar = function(x, type = "chars", allowNA = FALSE, keepNA = NA) { if (allowNA) { stop("allowNA = TRUE not supported for Arrow", call. = FALSE) From 84e39485081e59b1f0c941e363009673115acfad Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Wed, 7 Apr 2021 18:15:56 -0400 Subject: [PATCH 2/3] Allow truncation in conversion to integer types --- r/R/dplyr.R | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/r/R/dplyr.R b/r/R/dplyr.R index 10dc669f6d4..463f0884ae9 100644 --- a/r/R/dplyr.R +++ b/r/R/dplyr.R @@ -403,10 +403,26 @@ build_function_list <- function(FUN) { FUN("cast", x, options = cast_options(to_type = float64())) }, as.integer = function(x) { - FUN("cast", x, options = cast_options(to_type = int32())) + FUN( + "cast", + x, + options = cast_options( + to_type = int32(), + allow_float_truncate = TRUE, + allow_decimal_truncate = TRUE + ) + ) }, as.integer64 = function(x) { - FUN("cast", x, options = cast_options(to_type = int64())) + FUN( + "cast", + x, + options = cast_options( + to_type = int64(), + allow_float_truncate = TRUE, + allow_decimal_truncate = TRUE + ) + ) }, as.logical = function(x) { FUN("cast", x, options = cast_options(to_type = boolean())) From 1604c601e73140295816b8db735caee9a9a026e7 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Wed, 7 Apr 2021 18:54:50 -0400 Subject: [PATCH 3/3] Add tests --- r/tests/testthat/test-dplyr.R | 102 ++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R index 69f0b5827fc..b8ebc357d60 100644 --- a/r/tests/testthat/test-dplyr.R +++ b/r/tests/testthat/test-dplyr.R @@ -316,3 +316,105 @@ test_that("relocate with selection helpers", { "Unsupported selection helper" ) }) + +test_that("explicit type conversions", { + library(bit64) + expect_dplyr_equal( + input %>% + transmute( + int2chr = as.character(int), + int2dbl = as.double(int), + int2int = as.integer(int), + int2num = as.numeric(int), + dbl2chr = as.character(dbl), + dbl2dbl = as.double(dbl), + dbl2int = as.integer(dbl), + dbl2num = as.numeric(dbl), + ) %>% + collect(), + tbl + ) + expect_dplyr_equal( + input %>% + transmute( + chr2chr = as.character(chr), + chr2dbl = as.double(chr), + chr2int = as.integer(chr), + chr2num = as.numeric(chr) + ) %>% + collect(), + tibble(chr = c("1", "2", "3")) + ) + expect_dplyr_equal( + input %>% + transmute( + chr2i64 = as.integer64(chr), + dbl2i64 = as.integer64(dbl), + i642i64 = as.integer64(i64), + ) %>% + collect(), + tibble(chr = "10000000000", dbl = 10000000000, i64 = as.integer64(1e10)) + ) + expect_dplyr_equal( + input %>% + transmute( + chr2lgl = as.logical(chr), + dbl2lgl = as.logical(dbl), + int2lgl = as.logical(int) + ) %>% + collect(), + tibble( + chr = c("TRUE", "FALSE", "true", "false"), + dbl = c(1, 0, -99, 0), + int = c(1L, 0L, -99L, 0L) + ) + ) + expect_dplyr_equal( + input %>% + transmute( + dbl2chr = as.character(dbl), + dbl2dbl = as.double(dbl), + dbl2int = as.integer(dbl), + dbl2lgl = as.logical(dbl), + int2chr = as.character(int), + int2dbl = as.double(int), + int2int = as.integer(int), + int2lgl = as.logical(int), + lgl2chr = toupper(as.character(lgl)), # Arrow returns "true", "false" + lgl2dbl = as.double(lgl), + lgl2int = as.integer(lgl), + lgl2lgl = as.logical(lgl), + ) %>% + collect(), + tibble( + dbl = c(1, 0, NA_real_), + int = c(1L, 0L, NA_integer_), + lgl = c(TRUE, FALSE, NA) + ) + ) +}) + +test_that("bad explicit type conversions", { + + # Arrow returns lowercase "true", "false" + expect_error( + expect_dplyr_equal( + input %>% + transmute(lgl2chr = as.character(lgl)) %>% + collect(), + tibble(lgl = c(TRUE, FALSE, NA) + ) + ) + ) + + # Arrow fails to parse these strings as Booleans + expect_error( + expect_dplyr_equal( + input %>% + transmute(chr2lgl = as.logical(chr)) %>% + collect(), + tibble(chr = c("TRU", "FAX", "")) + ) + ) + +})