Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion r/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ Suggests:
rmarkdown,
stringr,
testthat,
tibble
tibble,
withr
LinkingTo: cpp11 (>= 0.2.0)
Collate:
'enums.R'
Expand Down
7 changes: 6 additions & 1 deletion r/R/arrow-datum.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,12 @@ is.na.ArrowDatum <- function(x) call_function("is_null", x)
is.nan.ArrowDatum <- function(x) call_function("is_nan", x)

#' @export
as.vector.ArrowDatum <- function(x, mode) x$as_vector()
as.vector.ArrowDatum <- function(x, mode) {
tryCatch(
x$as_vector(),
error = handle_embedded_nul_error
)
}

filter_rows <- function(x, i, keep_na = TRUE, ...) {
# General purpose function for [ row subsetting with R semantics
Expand Down
5 changes: 4 additions & 1 deletion r/R/arrow-tabular.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,10 @@ ArrowTabular <- R6Class("ArrowTabular", inherit = ArrowObject,

#' @export
as.data.frame.ArrowTabular <- function(x, row.names = NULL, optional = FALSE, ...) {
df <- x$to_data_frame()
tryCatch(
df <- x$to_data_frame(),
error = handle_embedded_nul_error
)
if (!is.null(r_metadata <- x$metadata$r)) {
df <- apply_arrow_r_metadata(df, .unserialize_arrow_r_metadata(r_metadata))
}
Expand Down
8 changes: 8 additions & 0 deletions r/R/util.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,11 @@ all_names <- function(expr) {
is_constant <- function(expr) {
length(all_vars(expr)) == 0
}

handle_embedded_nul_error <- function(e) {
msg <- conditionMessage(e)
if (grepl(" nul ", msg)) {
e$message <- paste0(msg, "; to strip nuls when converting from Arrow to R, set options(arrow.skip_nul = TRUE)")
}
stop(e)
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the call info in the error message at all informative? If not, you might want stop(msg, call. = FALSE) inside the conditional (after appending to the message of course).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought (though I could be remembering wrong) that when you call stop() on a caught simpleError class object, you just re-raise it and that preserves however the original source of the stop() had said call. or whatever options. I'll review/confirm tomorrow.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Confirmed:

> f <- function(...) stop(...)
> f("boo")
Error in f("boo") : boo
> f("boo", call. = FALSE)
Error: boo
> tryCatch(f("boo"), error = function(e) stop(e))
Error in f("boo") : boo
> tryCatch(f("boo", call. = FALSE), error = function(e) stop(e))
Error: boo

9 changes: 6 additions & 3 deletions r/src/array_to_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,14 @@ class Converter {
// for each array, add a task to the task group
//
// The task group is Finish() in the caller
void IngestParallel(SEXP data, const std::shared_ptr<arrow::internal::TaskGroup>& tg) {
// The converter itself is passed as `self` so that if one of the parallel ops
// hits `stop()`, we don't bail before `tg` is destroyed, which would cause a crash
void IngestParallel(SEXP data, const std::shared_ptr<arrow::internal::TaskGroup>& tg,
std::shared_ptr<Converter> self) {
R_xlen_t k = 0, i = 0;
for (const auto& array : arrays_) {
auto n_chunk = array->length();
tg->Append([=] { return IngestOne(data, array, k, n_chunk, i); });
tg->Append([=] { return self->IngestOne(data, array, k, n_chunk, i); });
k += n_chunk;
i++;
}
Expand Down Expand Up @@ -1242,7 +1245,7 @@ cpp11::writable::list to_dataframe_parallel(

// add a task to ingest data of that column if that can be done in parallel
if (converters[i]->Parallel()) {
converters[i]->IngestParallel(column, tg);
converters[i]->IngestParallel(column, tg, converters[i]);
}
}

Expand Down
4 changes: 1 addition & 3 deletions r/tests/testthat/helper-arrow.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,5 @@ test_that <- function(what, code) {
# Wrapper to run tests that only touch R code even when the C++ library isn't
# available (so that at least some tests are run on those platforms)
r_only <- function(code) {
old <- options(..skip.tests = FALSE)
on.exit(options(old))
code
withr::with_options(list(..skip.tests = FALSE), code)
}
65 changes: 37 additions & 28 deletions r/tests/testthat/test-Array.R
Original file line number Diff line number Diff line change
Expand Up @@ -262,17 +262,16 @@ test_that("array supports POSIXct (ARROW-3340)", {

test_that("array supports POSIXct without timezone", {
# Make sure timezone is not set
tz <- Sys.getenv("TZ")
Sys.setenv(TZ = "")
on.exit(Sys.setenv(TZ = tz))
times <- strptime("2019-02-03 12:34:56", format="%Y-%m-%d %H:%M:%S") + 1:10
expect_array_roundtrip(times, timestamp("us", ""))
withr::with_envvar(c(TZ = ""), {
times <- strptime("2019-02-03 12:34:56", format="%Y-%m-%d %H:%M:%S") + 1:10
expect_array_roundtrip(times, timestamp("us", ""))

# Also test the INTSXP code path
skip("Ingest_POSIXct only implemented for REALSXP")
times_int <- as.integer(times)
attributes(times_int) <- attributes(times)
expect_array_roundtrip(times_int, timestamp("us", ""))
# Also test the INTSXP code path
skip("Ingest_POSIXct only implemented for REALSXP")
times_int <- as.integer(times)
attributes(times_int) <- attributes(times)
expect_array_roundtrip(times_int, timestamp("us", ""))
})
})

test_that("Timezone handling in Arrow roundtrip (ARROW-3543)", {
Expand Down Expand Up @@ -634,18 +633,28 @@ test_that("Handling string data with embedded nuls", {
as.raw(c(0x63, 0x61, 0x6d, 0x65, 0x72, 0x61)),
as.raw(c(0x74, 0x76))),
class = c("arrow_binary", "vctrs_vctr", "list"))
expect_error(rawToChar(raws[[3]]), "nul") # See?
expect_error(
rawToChar(raws[[3]]),
"embedded nul in string: 'ma\\0n'", # See?
fixed = TRUE
)
array_with_nul <- Array$create(raws)$cast(utf8())
expect_error(as.vector(array_with_nul), "nul")

options(arrow.skip_nul = TRUE)
expect_warning(
expect_identical(
as.vector(array_with_nul),
c("person", "woman", "man", "fan", "camera", "tv")
),
"Stripping '\\\\0' \\(nul\\) from character vector"
expect_error(
as.vector(array_with_nul),
"embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, set options(arrow.skip_nul = TRUE)",
fixed = TRUE
)

withr::with_options(list(arrow.skip_nul = TRUE), {
expect_warning(
expect_identical(
as.vector(array_with_nul),
c("person", "woman", "man", "fan", "camera", "tv")
),
"Stripping '\\0' (nul) from character vector",
fixed = TRUE
)
})
})

test_that("Array$create() should have helpful error", {
Expand Down Expand Up @@ -793,14 +802,14 @@ test_that("Array$ApproxEquals", {
})

test_that("auto int64 conversion to int can be disabled (ARROW-10093)", {
op <- options(arrow.int64_downcast = FALSE); on.exit(options(op))

a <- Array$create(1:10, int64())
expect_true(inherits(a$as_vector(), "integer64"))
withr::with_options(list(arrow.int64_downcast = FALSE), {
a <- Array$create(1:10, int64())
expect_true(inherits(a$as_vector(), "integer64"))

batch <- RecordBatch$create(x = a)
expect_true(inherits(as.data.frame(batch)$x, "integer64"))
batch <- RecordBatch$create(x = a)
expect_true(inherits(as.data.frame(batch)$x, "integer64"))

tab <- Table$create(x = a)
expect_true(inherits(as.data.frame(batch)$x, "integer64"))
tab <- Table$create(x = a)
expect_true(inherits(as.data.frame(batch)$x, "integer64"))
})
})
28 changes: 28 additions & 0 deletions r/tests/testthat/test-RecordBatch.R
Original file line number Diff line number Diff line change
Expand Up @@ -471,3 +471,31 @@ test_that("record_batch() with different length arrays", {
expect_error(record_batch(a=1:5, b = 42), msg)
expect_error(record_batch(a=1:5, b = 1:6), msg)
})

test_that("Handling string data with embedded nuls", {
raws <- structure(list(
as.raw(c(0x70, 0x65, 0x72, 0x73, 0x6f, 0x6e)),
as.raw(c(0x77, 0x6f, 0x6d, 0x61, 0x6e)),
as.raw(c(0x6d, 0x61, 0x00, 0x6e)), # <-- there's your nul, 0x00
as.raw(c(0x63, 0x61, 0x6d, 0x65, 0x72, 0x61)),
as.raw(c(0x74, 0x76))),
class = c("arrow_binary", "vctrs_vctr", "list"))
batch_with_nul <- record_batch(a = 1:5, b = raws)
batch_with_nul$b <- batch_with_nul$b$cast(utf8())
expect_error(
as.data.frame(batch_with_nul),
"embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, set options(arrow.skip_nul = TRUE)",
fixed = TRUE
)

withr::with_options(list(arrow.skip_nul = TRUE), {
expect_warning(
expect_equivalent(
as.data.frame(batch_with_nul)$b,
c("person", "woman", "man", "camera", "tv")
),
"Stripping '\\0' (nul) from character vector",
fixed = TRUE
)
})
})
28 changes: 28 additions & 0 deletions r/tests/testthat/test-chunked-array.R
Original file line number Diff line number Diff line change
Expand Up @@ -383,3 +383,31 @@ test_that("Converting a chunked array unifies factors (ARROW-8374)", {

expect_identical(ca$as_vector(), res)
})

test_that("Handling string data with embedded nuls", {
raws <- structure(list(
as.raw(c(0x70, 0x65, 0x72, 0x73, 0x6f, 0x6e)),
as.raw(c(0x77, 0x6f, 0x6d, 0x61, 0x6e)),
as.raw(c(0x6d, 0x61, 0x00, 0x6e)), # <-- there's your nul, 0x00
as.raw(c(0x66, 0x00, 0x00, 0x61, 0x00, 0x6e)), # multiple nuls
as.raw(c(0x63, 0x61, 0x6d, 0x65, 0x72, 0x61)),
as.raw(c(0x74, 0x76))),
class = c("arrow_binary", "vctrs_vctr", "list"))
chunked_array_with_nul <- ChunkedArray$create(raws)$cast(utf8())
expect_error(
as.vector(chunked_array_with_nul),
"embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, set options(arrow.skip_nul = TRUE)",
fixed = TRUE
)

withr::with_options(list(arrow.skip_nul = TRUE), {
expect_warning(
expect_identical(
as.vector(chunked_array_with_nul),
c("person", "woman", "man", "fan", "camera", "tv")
),
"Stripping '\\0' (nul) from character vector",
fixed = TRUE
)
})
})
18 changes: 9 additions & 9 deletions r/tests/testthat/test-install-arrow.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,17 @@ r_only({
ours <- "https://dl.example.com/ursalabs/fake_repo"
other <- "https://cran.fiocruz.br/"

old <- options(
opts <- list(
repos=c(CRAN = "@CRAN@"), # Restore defaul
arrow.dev_repo = ours
)
on.exit(options(old))

expect_identical(arrow_repos(), cran)
expect_identical(arrow_repos(c(cran, ours)), cran)
expect_identical(arrow_repos(c(ours, other)), other)
expect_identical(arrow_repos(nightly = TRUE), c(ours, cran))
expect_identical(arrow_repos(c(cran, ours), nightly = TRUE), c(ours, cran))
expect_identical(arrow_repos(c(ours, other), nightly = TRUE), c(ours, other))
withr::with_options(opts, {
expect_identical(arrow_repos(), cran)
expect_identical(arrow_repos(c(cran, ours)), cran)
expect_identical(arrow_repos(c(ours, other)), other)
expect_identical(arrow_repos(nightly = TRUE), c(ours, cran))
expect_identical(arrow_repos(c(cran, ours), nightly = TRUE), c(ours, cran))
expect_identical(arrow_repos(c(ours, other), nightly = TRUE), c(ours, other))
})
})
})
26 changes: 26 additions & 0 deletions r/tests/testthat/test-scalar.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,29 @@ test_that("Scalar$ApproxEquals", {
expect_false(a$ApproxEquals(d))
expect_false(a$ApproxEquals(aa))
})

test_that("Handling string data with embedded nuls", {
raws <- as.raw(c(0x6d, 0x61, 0x00, 0x6e))
expect_error(
rawToChar(raws),
"embedded nul in string: 'ma\\0n'", # See?
fixed = TRUE
)
scalar_with_nul <- Scalar$create(raws, binary())$cast(utf8())
expect_error(
as.vector(scalar_with_nul),
"embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, set options(arrow.skip_nul = TRUE)",
fixed = TRUE
)

withr::with_options(list(arrow.skip_nul = TRUE), {
expect_warning(
expect_identical(
as.vector(scalar_with_nul),
"man"
),
"Stripping '\\0' (nul) from character vector",
fixed = TRUE
)
})
})